]> git.proxmox.com Git - mirror_ubuntu-focal-kernel.git/blame - tools/perf/builtin-trace.c
perf trace: Group per syscall arg formatter info into one struct
[mirror_ubuntu-focal-kernel.git] / tools / perf / builtin-trace.c
CommitLineData
a598bb5e
ACM
1/*
2 * builtin-trace.c
3 *
4 * Builtin 'trace' command:
5 *
6 * Display a continuously updated trace of any workload, CPU, specific PID,
7 * system wide, etc. Default format is loosely strace like, but any other
8 * event may be specified using --event.
9 *
10 * Copyright (C) 2012, 2013, 2014, 2015 Red Hat Inc, Arnaldo Carvalho de Melo <acme@redhat.com>
11 *
12 * Initially based on the 'trace' prototype by Thomas Gleixner:
13 *
14 * http://lwn.net/Articles/415728/ ("Announcing a new utility: 'trace'")
15 *
16 * Released under the GPL v2. (and only v2, not any later version)
17 */
18
4e319027 19#include <traceevent/event-parse.h>
988bdb31 20#include <api/fs/tracing_path.h>
514f1c67 21#include "builtin.h"
752fde44 22#include "util/color.h"
7c304ee0 23#include "util/debug.h"
5ab8c689 24#include "util/event.h"
514f1c67 25#include "util/evlist.h"
4b6ab94e 26#include <subcmd/exec-cmd.h>
752fde44 27#include "util/machine.h"
9a3993d4 28#include "util/path.h"
6810fc91 29#include "util/session.h"
752fde44 30#include "util/thread.h"
4b6ab94e 31#include <subcmd/parse-options.h>
2ae3a312 32#include "util/strlist.h"
bdc89661 33#include "util/intlist.h"
514f1c67 34#include "util/thread_map.h"
bf2575c1 35#include "util/stat.h"
fd5cead2 36#include "trace/beauty/beauty.h"
97978b3e 37#include "trace-event.h"
9aca7f17 38#include "util/parse-events.h"
ba504235 39#include "util/bpf-loader.h"
566a0885 40#include "callchain.h"
fea01392 41#include "print_binary.h"
a067558e 42#include "string2.h"
fd0db102 43#include "syscalltbl.h"
96c14451 44#include "rb_resort.h"
514f1c67 45
a43783ae 46#include <errno.h>
fd20e811 47#include <inttypes.h>
fd0db102 48#include <libaudit.h> /* FIXME: Still needed for audit_errno_to_name */
4208735d 49#include <poll.h>
9607ad3a 50#include <signal.h>
514f1c67 51#include <stdlib.h>
017037ff 52#include <string.h>
8dd2a131 53#include <linux/err.h>
997bba8c
ACM
54#include <linux/filter.h>
55#include <linux/audit.h>
877a7a11 56#include <linux/kernel.h>
39878d49 57#include <linux/random.h>
c6d4a494 58#include <linux/stringify.h>
bd48c63e 59#include <linux/time64.h>
514f1c67 60
3d689ed6
ACM
61#include "sane_ctype.h"
62
c188e7ac
ACM
63#ifndef O_CLOEXEC
64# define O_CLOEXEC 02000000
65#endif
66
83a51694
ACM
67#ifndef F_LINUX_SPECIFIC_BASE
68# define F_LINUX_SPECIFIC_BASE 1024
69#endif
70
d1d438a3
ACM
71struct trace {
72 struct perf_tool tool;
fd0db102 73 struct syscalltbl *sctbl;
d1d438a3
ACM
74 struct {
75 int max;
76 struct syscall *table;
77 struct {
78 struct perf_evsel *sys_enter,
79 *sys_exit;
80 } events;
81 } syscalls;
82 struct record_opts opts;
83 struct perf_evlist *evlist;
84 struct machine *host;
85 struct thread *current;
86 u64 base_time;
87 FILE *output;
88 unsigned long nr_events;
89 struct strlist *ev_qualifier;
90 struct {
91 size_t nr;
92 int *entries;
93 } ev_qualifier_ids;
d1d438a3
ACM
94 struct {
95 size_t nr;
96 pid_t *entries;
97 } filter_pids;
98 double duration_filter;
99 double runtime_ms;
100 struct {
101 u64 vfs_getname,
102 proc_getname;
103 } stats;
c6d4a494 104 unsigned int max_stack;
5cf9c84e 105 unsigned int min_stack;
d1d438a3
ACM
106 bool not_ev_qualifier;
107 bool live;
108 bool full_time;
109 bool sched;
110 bool multiple_threads;
111 bool summary;
112 bool summary_only;
113 bool show_comm;
114 bool show_tool_stats;
115 bool trace_syscalls;
44621819 116 bool kernel_syscallchains;
d1d438a3
ACM
117 bool force;
118 bool vfs_getname;
119 int trace_pgfaults;
fd0db102 120 int open_id;
d1d438a3 121};
a1c2552d 122
77170988
ACM
123struct tp_field {
124 int offset;
125 union {
126 u64 (*integer)(struct tp_field *field, struct perf_sample *sample);
127 void *(*pointer)(struct tp_field *field, struct perf_sample *sample);
128 };
129};
130
131#define TP_UINT_FIELD(bits) \
132static u64 tp_field__u##bits(struct tp_field *field, struct perf_sample *sample) \
133{ \
55d43bca
DA
134 u##bits value; \
135 memcpy(&value, sample->raw_data + field->offset, sizeof(value)); \
136 return value; \
77170988
ACM
137}
138
139TP_UINT_FIELD(8);
140TP_UINT_FIELD(16);
141TP_UINT_FIELD(32);
142TP_UINT_FIELD(64);
143
144#define TP_UINT_FIELD__SWAPPED(bits) \
145static u64 tp_field__swapped_u##bits(struct tp_field *field, struct perf_sample *sample) \
146{ \
55d43bca
DA
147 u##bits value; \
148 memcpy(&value, sample->raw_data + field->offset, sizeof(value)); \
77170988
ACM
149 return bswap_##bits(value);\
150}
151
152TP_UINT_FIELD__SWAPPED(16);
153TP_UINT_FIELD__SWAPPED(32);
154TP_UINT_FIELD__SWAPPED(64);
155
156static int tp_field__init_uint(struct tp_field *field,
157 struct format_field *format_field,
158 bool needs_swap)
159{
160 field->offset = format_field->offset;
161
162 switch (format_field->size) {
163 case 1:
164 field->integer = tp_field__u8;
165 break;
166 case 2:
167 field->integer = needs_swap ? tp_field__swapped_u16 : tp_field__u16;
168 break;
169 case 4:
170 field->integer = needs_swap ? tp_field__swapped_u32 : tp_field__u32;
171 break;
172 case 8:
173 field->integer = needs_swap ? tp_field__swapped_u64 : tp_field__u64;
174 break;
175 default:
176 return -1;
177 }
178
179 return 0;
180}
181
182static void *tp_field__ptr(struct tp_field *field, struct perf_sample *sample)
183{
184 return sample->raw_data + field->offset;
185}
186
187static int tp_field__init_ptr(struct tp_field *field, struct format_field *format_field)
188{
189 field->offset = format_field->offset;
190 field->pointer = tp_field__ptr;
191 return 0;
192}
193
194struct syscall_tp {
195 struct tp_field id;
196 union {
197 struct tp_field args, ret;
198 };
199};
200
201static int perf_evsel__init_tp_uint_field(struct perf_evsel *evsel,
202 struct tp_field *field,
203 const char *name)
204{
205 struct format_field *format_field = perf_evsel__field(evsel, name);
206
207 if (format_field == NULL)
208 return -1;
209
210 return tp_field__init_uint(field, format_field, evsel->needs_swap);
211}
212
213#define perf_evsel__init_sc_tp_uint_field(evsel, name) \
214 ({ struct syscall_tp *sc = evsel->priv;\
215 perf_evsel__init_tp_uint_field(evsel, &sc->name, #name); })
216
217static int perf_evsel__init_tp_ptr_field(struct perf_evsel *evsel,
218 struct tp_field *field,
219 const char *name)
220{
221 struct format_field *format_field = perf_evsel__field(evsel, name);
222
223 if (format_field == NULL)
224 return -1;
225
226 return tp_field__init_ptr(field, format_field);
227}
228
229#define perf_evsel__init_sc_tp_ptr_field(evsel, name) \
230 ({ struct syscall_tp *sc = evsel->priv;\
231 perf_evsel__init_tp_ptr_field(evsel, &sc->name, #name); })
232
233static void perf_evsel__delete_priv(struct perf_evsel *evsel)
234{
04662523 235 zfree(&evsel->priv);
77170988
ACM
236 perf_evsel__delete(evsel);
237}
238
96695d44
NK
239static int perf_evsel__init_syscall_tp(struct perf_evsel *evsel, void *handler)
240{
241 evsel->priv = malloc(sizeof(struct syscall_tp));
242 if (evsel->priv != NULL) {
243 if (perf_evsel__init_sc_tp_uint_field(evsel, id))
244 goto out_delete;
245
246 evsel->handler = handler;
247 return 0;
248 }
249
250 return -ENOMEM;
251
252out_delete:
04662523 253 zfree(&evsel->priv);
96695d44
NK
254 return -ENOENT;
255}
256
ef503831 257static struct perf_evsel *perf_evsel__syscall_newtp(const char *direction, void *handler)
77170988 258{
ef503831 259 struct perf_evsel *evsel = perf_evsel__newtp("raw_syscalls", direction);
77170988 260
9aca7f17 261 /* older kernel (e.g., RHEL6) use syscalls:{enter,exit} */
8dd2a131 262 if (IS_ERR(evsel))
9aca7f17
DA
263 evsel = perf_evsel__newtp("syscalls", direction);
264
8dd2a131
JO
265 if (IS_ERR(evsel))
266 return NULL;
267
268 if (perf_evsel__init_syscall_tp(evsel, handler))
269 goto out_delete;
77170988
ACM
270
271 return evsel;
272
273out_delete:
274 perf_evsel__delete_priv(evsel);
275 return NULL;
276}
277
278#define perf_evsel__sc_tp_uint(evsel, name, sample) \
279 ({ struct syscall_tp *fields = evsel->priv; \
280 fields->name.integer(&fields->name, sample); })
281
282#define perf_evsel__sc_tp_ptr(evsel, name, sample) \
283 ({ struct syscall_tp *fields = evsel->priv; \
284 fields->name.pointer(&fields->name, sample); })
285
0ae79636
ACM
286size_t strarray__scnprintf(struct strarray *sa, char *bf, size_t size, const char *intfmt, int val)
287{
288 int idx = val - sa->offset;
1f115cb7 289
0ae79636
ACM
290 if (idx < 0 || idx >= sa->nr_entries)
291 return scnprintf(bf, size, intfmt, val);
1f115cb7 292
0ae79636 293 return scnprintf(bf, size, "%s", sa->entries[idx]);
03e3adc9
ACM
294}
295
975b7c2f
ACM
296static size_t __syscall_arg__scnprintf_strarray(char *bf, size_t size,
297 const char *intfmt,
298 struct syscall_arg *arg)
1f115cb7 299{
0ae79636 300 return strarray__scnprintf(arg->parm, bf, size, intfmt, arg->val);
1f115cb7
ACM
301}
302
975b7c2f
ACM
303static size_t syscall_arg__scnprintf_strarray(char *bf, size_t size,
304 struct syscall_arg *arg)
305{
306 return __syscall_arg__scnprintf_strarray(bf, size, "%d", arg);
307}
308
1f115cb7
ACM
309#define SCA_STRARRAY syscall_arg__scnprintf_strarray
310
83a51694
ACM
311struct strarrays {
312 int nr_entries;
313 struct strarray **entries;
314};
315
316#define DEFINE_STRARRAYS(array) struct strarrays strarrays__##array = { \
317 .nr_entries = ARRAY_SIZE(array), \
318 .entries = array, \
319}
320
274e86fd
ACM
321size_t syscall_arg__scnprintf_strarrays(char *bf, size_t size,
322 struct syscall_arg *arg)
83a51694
ACM
323{
324 struct strarrays *sas = arg->parm;
325 int i;
326
327 for (i = 0; i < sas->nr_entries; ++i) {
328 struct strarray *sa = sas->entries[i];
329 int idx = arg->val - sa->offset;
330
331 if (idx >= 0 && idx < sa->nr_entries) {
332 if (sa->entries[idx] == NULL)
333 break;
334 return scnprintf(bf, size, "%s", sa->entries[idx]);
335 }
336 }
337
338 return scnprintf(bf, size, "%d", arg->val);
339}
340
844ae5b4
ACM
341#if defined(__i386__) || defined(__x86_64__)
342/*
343 * FIXME: Make this available to all arches as soon as the ioctl beautifier
344 * gets rewritten to support all arches.
345 */
78645cf3
ACM
346static size_t syscall_arg__scnprintf_strhexarray(char *bf, size_t size,
347 struct syscall_arg *arg)
348{
349 return __syscall_arg__scnprintf_strarray(bf, size, "%#x", arg);
350}
351
352#define SCA_STRHEXARRAY syscall_arg__scnprintf_strhexarray
844ae5b4 353#endif /* defined(__i386__) || defined(__x86_64__) */
78645cf3 354
48e1f91a
ACM
355#ifndef AT_FDCWD
356#define AT_FDCWD -100
357#endif
358
75b757ca
ACM
359static size_t syscall_arg__scnprintf_fd_at(char *bf, size_t size,
360 struct syscall_arg *arg)
361{
362 int fd = arg->val;
363
364 if (fd == AT_FDCWD)
365 return scnprintf(bf, size, "CWD");
366
367 return syscall_arg__scnprintf_fd(bf, size, arg);
368}
369
370#define SCA_FDAT syscall_arg__scnprintf_fd_at
371
372static size_t syscall_arg__scnprintf_close_fd(char *bf, size_t size,
373 struct syscall_arg *arg);
374
375#define SCA_CLOSE_FD syscall_arg__scnprintf_close_fd
376
2c2b1623 377size_t syscall_arg__scnprintf_hex(char *bf, size_t size, struct syscall_arg *arg)
13d4ff3e 378{
01533e97 379 return scnprintf(bf, size, "%#lx", arg->val);
13d4ff3e
ACM
380}
381
2c2b1623 382size_t syscall_arg__scnprintf_int(char *bf, size_t size, struct syscall_arg *arg)
a1c2552d
ACM
383{
384 return scnprintf(bf, size, "%d", arg->val);
385}
386
5dde91ed
ACM
387size_t syscall_arg__scnprintf_long(char *bf, size_t size, struct syscall_arg *arg)
388{
389 return scnprintf(bf, size, "%ld", arg->val);
390}
391
729a7841
ACM
392static const char *bpf_cmd[] = {
393 "MAP_CREATE", "MAP_LOOKUP_ELEM", "MAP_UPDATE_ELEM", "MAP_DELETE_ELEM",
394 "MAP_GET_NEXT_KEY", "PROG_LOAD",
395};
396static DEFINE_STRARRAY(bpf_cmd);
397
03e3adc9
ACM
398static const char *epoll_ctl_ops[] = { "ADD", "DEL", "MOD", };
399static DEFINE_STRARRAY_OFFSET(epoll_ctl_ops, 1);
eac032c5 400
1f115cb7
ACM
401static const char *itimers[] = { "REAL", "VIRTUAL", "PROF", };
402static DEFINE_STRARRAY(itimers);
403
b62bee1b
ACM
404static const char *keyctl_options[] = {
405 "GET_KEYRING_ID", "JOIN_SESSION_KEYRING", "UPDATE", "REVOKE", "CHOWN",
406 "SETPERM", "DESCRIBE", "CLEAR", "LINK", "UNLINK", "SEARCH", "READ",
407 "INSTANTIATE", "NEGATE", "SET_REQKEY_KEYRING", "SET_TIMEOUT",
408 "ASSUME_AUTHORITY", "GET_SECURITY", "SESSION_TO_PARENT", "REJECT",
409 "INSTANTIATE_IOV", "INVALIDATE", "GET_PERSISTENT",
410};
411static DEFINE_STRARRAY(keyctl_options);
412
efe6b882
ACM
413static const char *whences[] = { "SET", "CUR", "END",
414#ifdef SEEK_DATA
415"DATA",
416#endif
417#ifdef SEEK_HOLE
418"HOLE",
419#endif
420};
421static DEFINE_STRARRAY(whences);
f9da0b0c 422
80f587d5
ACM
423static const char *fcntl_cmds[] = {
424 "DUPFD", "GETFD", "SETFD", "GETFL", "SETFL", "GETLK", "SETLK",
e000e5e3
ACM
425 "SETLKW", "SETOWN", "GETOWN", "SETSIG", "GETSIG", "GETLK64",
426 "SETLK64", "SETLKW64", "SETOWN_EX", "GETOWN_EX",
427 "GETOWNER_UIDS",
80f587d5
ACM
428};
429static DEFINE_STRARRAY(fcntl_cmds);
430
83a51694
ACM
431static const char *fcntl_linux_specific_cmds[] = {
432 "SETLEASE", "GETLEASE", "NOTIFY", [5] = "CANCELLK", "DUPFD_CLOEXEC",
433 "SETPIPE_SZ", "GETPIPE_SZ", "ADD_SEALS", "GET_SEALS",
64e4561d 434 "GET_RW_HINT", "SET_RW_HINT", "GET_FILE_RW_HINT", "SET_FILE_RW_HINT",
83a51694
ACM
435};
436
437static DEFINE_STRARRAY_OFFSET(fcntl_linux_specific_cmds, F_LINUX_SPECIFIC_BASE);
438
439static struct strarray *fcntl_cmds_arrays[] = {
440 &strarray__fcntl_cmds,
441 &strarray__fcntl_linux_specific_cmds,
442};
443
444static DEFINE_STRARRAYS(fcntl_cmds_arrays);
445
c045bf02
ACM
446static const char *rlimit_resources[] = {
447 "CPU", "FSIZE", "DATA", "STACK", "CORE", "RSS", "NPROC", "NOFILE",
448 "MEMLOCK", "AS", "LOCKS", "SIGPENDING", "MSGQUEUE", "NICE", "RTPRIO",
449 "RTTIME",
450};
451static DEFINE_STRARRAY(rlimit_resources);
452
eb5b1b14
ACM
453static const char *sighow[] = { "BLOCK", "UNBLOCK", "SETMASK", };
454static DEFINE_STRARRAY(sighow);
455
4f8c1b74
DA
456static const char *clockid[] = {
457 "REALTIME", "MONOTONIC", "PROCESS_CPUTIME_ID", "THREAD_CPUTIME_ID",
28ebb87c
ACM
458 "MONOTONIC_RAW", "REALTIME_COARSE", "MONOTONIC_COARSE", "BOOTTIME",
459 "REALTIME_ALARM", "BOOTTIME_ALARM", "SGI_CYCLE", "TAI"
4f8c1b74
DA
460};
461static DEFINE_STRARRAY(clockid);
462
e10bce81
ACM
463static const char *socket_families[] = {
464 "UNSPEC", "LOCAL", "INET", "AX25", "IPX", "APPLETALK", "NETROM",
465 "BRIDGE", "ATMPVC", "X25", "INET6", "ROSE", "DECnet", "NETBEUI",
466 "SECURITY", "KEY", "NETLINK", "PACKET", "ASH", "ECONET", "ATMSVC",
467 "RDS", "SNA", "IRDA", "PPPOX", "WANPIPE", "LLC", "IB", "CAN", "TIPC",
468 "BLUETOOTH", "IUCV", "RXRPC", "ISDN", "PHONET", "IEEE802154", "CAIF",
469 "ALG", "NFC", "VSOCK",
470};
471static DEFINE_STRARRAY(socket_families);
472
51108999
ACM
473static size_t syscall_arg__scnprintf_access_mode(char *bf, size_t size,
474 struct syscall_arg *arg)
475{
476 size_t printed = 0;
477 int mode = arg->val;
478
479 if (mode == F_OK) /* 0 */
480 return scnprintf(bf, size, "F");
481#define P_MODE(n) \
482 if (mode & n##_OK) { \
483 printed += scnprintf(bf + printed, size - printed, "%s", #n); \
484 mode &= ~n##_OK; \
485 }
486
487 P_MODE(R);
488 P_MODE(W);
489 P_MODE(X);
490#undef P_MODE
491
492 if (mode)
493 printed += scnprintf(bf + printed, size - printed, "|%#x", mode);
494
495 return printed;
496}
497
498#define SCA_ACCMODE syscall_arg__scnprintf_access_mode
499
f994592d
ACM
500static size_t syscall_arg__scnprintf_filename(char *bf, size_t size,
501 struct syscall_arg *arg);
502
503#define SCA_FILENAME syscall_arg__scnprintf_filename
504
46cce19b
ACM
505static size_t syscall_arg__scnprintf_pipe_flags(char *bf, size_t size,
506 struct syscall_arg *arg)
507{
508 int printed = 0, flags = arg->val;
509
510#define P_FLAG(n) \
511 if (flags & O_##n) { \
512 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
513 flags &= ~O_##n; \
514 }
515
516 P_FLAG(CLOEXEC);
517 P_FLAG(NONBLOCK);
518#undef P_FLAG
519
520 if (flags)
521 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
522
523 return printed;
524}
525
526#define SCA_PIPE_FLAGS syscall_arg__scnprintf_pipe_flags
527
844ae5b4
ACM
528#if defined(__i386__) || defined(__x86_64__)
529/*
530 * FIXME: Make this available to all arches.
531 */
78645cf3
ACM
532#define TCGETS 0x5401
533
534static const char *tioctls[] = {
535 "TCGETS", "TCSETS", "TCSETSW", "TCSETSF", "TCGETA", "TCSETA", "TCSETAW",
536 "TCSETAF", "TCSBRK", "TCXONC", "TCFLSH", "TIOCEXCL", "TIOCNXCL",
537 "TIOCSCTTY", "TIOCGPGRP", "TIOCSPGRP", "TIOCOUTQ", "TIOCSTI",
538 "TIOCGWINSZ", "TIOCSWINSZ", "TIOCMGET", "TIOCMBIS", "TIOCMBIC",
539 "TIOCMSET", "TIOCGSOFTCAR", "TIOCSSOFTCAR", "FIONREAD", "TIOCLINUX",
540 "TIOCCONS", "TIOCGSERIAL", "TIOCSSERIAL", "TIOCPKT", "FIONBIO",
541 "TIOCNOTTY", "TIOCSETD", "TIOCGETD", "TCSBRKP", [0x27] = "TIOCSBRK",
542 "TIOCCBRK", "TIOCGSID", "TCGETS2", "TCSETS2", "TCSETSW2", "TCSETSF2",
543 "TIOCGRS485", "TIOCSRS485", "TIOCGPTN", "TIOCSPTLCK",
544 "TIOCGDEV||TCGETX", "TCSETX", "TCSETXF", "TCSETXW", "TIOCSIG",
545 "TIOCVHANGUP", "TIOCGPKT", "TIOCGPTLCK", "TIOCGEXCL",
546 [0x50] = "FIONCLEX", "FIOCLEX", "FIOASYNC", "TIOCSERCONFIG",
547 "TIOCSERGWILD", "TIOCSERSWILD", "TIOCGLCKTRMIOS", "TIOCSLCKTRMIOS",
548 "TIOCSERGSTRUCT", "TIOCSERGETLSR", "TIOCSERGETMULTI", "TIOCSERSETMULTI",
549 "TIOCMIWAIT", "TIOCGICOUNT", [0x60] = "FIOQSIZE",
550};
551
552static DEFINE_STRARRAY_OFFSET(tioctls, 0x5401);
844ae5b4 553#endif /* defined(__i386__) || defined(__x86_64__) */
78645cf3 554
a355a61e
ACM
555#ifndef GRND_NONBLOCK
556#define GRND_NONBLOCK 0x0001
557#endif
558#ifndef GRND_RANDOM
559#define GRND_RANDOM 0x0002
560#endif
561
39878d49
ACM
562static size_t syscall_arg__scnprintf_getrandom_flags(char *bf, size_t size,
563 struct syscall_arg *arg)
564{
565 int printed = 0, flags = arg->val;
566
567#define P_FLAG(n) \
568 if (flags & GRND_##n) { \
569 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
570 flags &= ~GRND_##n; \
571 }
572
573 P_FLAG(RANDOM);
574 P_FLAG(NONBLOCK);
575#undef P_FLAG
576
577 if (flags)
578 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
579
580 return printed;
581}
582
583#define SCA_GETRANDOM_FLAGS syscall_arg__scnprintf_getrandom_flags
584
82d4a110
ACM
585#define STRARRAY(name, array) \
586 { .scnprintf = SCA_STRARRAY, \
587 .parm = &strarray__##array, }
453350dd 588
ea8dc3ce 589#include "trace/beauty/eventfd.c"
8bf382ce 590#include "trace/beauty/flock.c"
d5d71e86 591#include "trace/beauty/futex_op.c"
df4cb167 592#include "trace/beauty/mmap.c"
ba2f22cf 593#include "trace/beauty/mode_t.c"
a30e6259 594#include "trace/beauty/msg_flags.c"
8f48df69 595#include "trace/beauty/open_flags.c"
62de344e 596#include "trace/beauty/perf_event_open.c"
d5d71e86 597#include "trace/beauty/pid.c"
a3bca91f 598#include "trace/beauty/sched_policy.c"
f5cd95ea 599#include "trace/beauty/seccomp.c"
12199d8e 600#include "trace/beauty/signum.c"
bbf86c43 601#include "trace/beauty/socket_type.c"
7206b900 602#include "trace/beauty/waitid_options.c"
a3bca91f 603
82d4a110
ACM
604struct syscall_arg_fmt {
605 size_t (*scnprintf)(char *bf, size_t size, struct syscall_arg *arg);
606 void *parm;
607};
608
514f1c67
ACM
609static struct syscall_fmt {
610 const char *name;
aec1930b 611 const char *alias;
82d4a110 612 struct syscall_arg_fmt arg[6];
514f1c67 613 bool errmsg;
11c8e39f 614 bool errpid;
514f1c67 615 bool timeout;
04b34729 616 bool hexret;
514f1c67 617} syscall_fmts[] = {
51108999 618 { .name = "access", .errmsg = true,
82d4a110 619 .arg = { [1] = { .scnprintf = SCA_ACCMODE, /* mode */ }, }, },
aec1930b 620 { .name = "arch_prctl", .errmsg = true, .alias = "prctl", },
82d4a110
ACM
621 { .name = "bpf", .errmsg = true,
622 .arg = { [0] = STRARRAY(cmd, bpf_cmd), }, },
beccb2b5 623 { .name = "brk", .hexret = true,
82d4a110 624 .arg = { [0] = { .scnprintf = SCA_HEX, /* brk */ }, }, },
12f3ca4f
ACM
625 { .name = "chdir", .errmsg = true, },
626 { .name = "chmod", .errmsg = true, },
627 { .name = "chroot", .errmsg = true, },
82d4a110
ACM
628 { .name = "clock_gettime", .errmsg = true,
629 .arg = { [0] = STRARRAY(clk_id, clockid), }, },
11c8e39f 630 { .name = "clone", .errpid = true, },
75b757ca 631 { .name = "close", .errmsg = true,
82d4a110 632 .arg = { [0] = { .scnprintf = SCA_CLOSE_FD, /* fd */ }, }, },
a14bb860 633 { .name = "connect", .errmsg = true, },
12f3ca4f 634 { .name = "creat", .errmsg = true, },
b6565c90
ACM
635 { .name = "dup", .errmsg = true, },
636 { .name = "dup2", .errmsg = true, },
637 { .name = "dup3", .errmsg = true, },
82d4a110
ACM
638 { .name = "epoll_ctl", .errmsg = true,
639 .arg = { [1] = STRARRAY(op, epoll_ctl_ops), }, },
49af9e93 640 { .name = "eventfd2", .errmsg = true,
82d4a110 641 .arg = { [1] = { .scnprintf = SCA_EFD_FLAGS, /* flags */ }, }, },
12f3ca4f 642 { .name = "faccessat", .errmsg = true, },
b6565c90
ACM
643 { .name = "fadvise64", .errmsg = true, },
644 { .name = "fallocate", .errmsg = true, },
645 { .name = "fchdir", .errmsg = true, },
646 { .name = "fchmod", .errmsg = true, },
75b757ca 647 { .name = "fchmodat", .errmsg = true,
82d4a110 648 .arg = { [0] = { .scnprintf = SCA_FDAT, /* fd */ }, }, },
b6565c90 649 { .name = "fchown", .errmsg = true, },
75b757ca 650 { .name = "fchownat", .errmsg = true,
82d4a110 651 .arg = { [0] = { .scnprintf = SCA_FDAT, /* fd */ }, }, },
75b757ca 652 { .name = "fcntl", .errmsg = true,
82d4a110
ACM
653 .arg = { [1] = { .scnprintf = SCA_FCNTL_CMD, /* cmd */
654 .parm = &strarrays__fcntl_cmds_arrays, /* cmd */ },
655 [2] = { .scnprintf = SCA_FCNTL_ARG, /* arg */ }, }, },
b6565c90 656 { .name = "fdatasync", .errmsg = true, },
5cea6ff2 657 { .name = "flock", .errmsg = true,
82d4a110 658 .arg = { [1] = { .scnprintf = SCA_FLOCK, /* cmd */ }, }, },
b6565c90
ACM
659 { .name = "fsetxattr", .errmsg = true, },
660 { .name = "fstat", .errmsg = true, .alias = "newfstat", },
12f3ca4f 661 { .name = "fstatat", .errmsg = true, .alias = "newfstatat", },
b6565c90
ACM
662 { .name = "fstatfs", .errmsg = true, },
663 { .name = "fsync", .errmsg = true, },
664 { .name = "ftruncate", .errmsg = true, },
f9da0b0c 665 { .name = "futex", .errmsg = true,
82d4a110 666 .arg = { [1] = { .scnprintf = SCA_FUTEX_OP, /* op */ }, }, },
75b757ca 667 { .name = "futimesat", .errmsg = true,
82d4a110 668 .arg = { [0] = { .scnprintf = SCA_FDAT, /* fd */ }, }, },
b6565c90
ACM
669 { .name = "getdents", .errmsg = true, },
670 { .name = "getdents64", .errmsg = true, },
82d4a110
ACM
671 { .name = "getitimer", .errmsg = true,
672 .arg = { [0] = STRARRAY(which, itimers), }, },
c65f1070 673 { .name = "getpid", .errpid = true, },
d1d438a3 674 { .name = "getpgid", .errpid = true, },
c65f1070 675 { .name = "getppid", .errpid = true, },
39878d49 676 { .name = "getrandom", .errmsg = true,
82d4a110
ACM
677 .arg = { [2] = { .scnprintf = SCA_GETRANDOM_FLAGS, /* flags */ }, }, },
678 { .name = "getrlimit", .errmsg = true,
679 .arg = { [0] = STRARRAY(resource, rlimit_resources), }, },
12f3ca4f 680 { .name = "getxattr", .errmsg = true, },
82d4a110 681 { .name = "inotify_add_watch", .errmsg = true, },
beccb2b5 682 { .name = "ioctl", .errmsg = true,
82d4a110 683 .arg = {
844ae5b4
ACM
684#if defined(__i386__) || defined(__x86_64__)
685/*
686 * FIXME: Make this available to all arches.
687 */
82d4a110
ACM
688 [1] = { .scnprintf = SCA_STRHEXARRAY, /* cmd */
689 .parm = &strarray__tioctls, },
690 [2] = { .scnprintf = SCA_HEX, /* arg */ }, }, },
844ae5b4 691#else
82d4a110 692 [2] = { .scnprintf = SCA_HEX, /* arg */ }, }, },
844ae5b4 693#endif
82d4a110
ACM
694 { .name = "keyctl", .errmsg = true,
695 .arg = { [0] = STRARRAY(option, keyctl_options), }, },
8bad5b0a 696 { .name = "kill", .errmsg = true,
82d4a110 697 .arg = { [1] = { .scnprintf = SCA_SIGNUM, /* sig */ }, }, },
12f3ca4f
ACM
698 { .name = "lchown", .errmsg = true, },
699 { .name = "lgetxattr", .errmsg = true, },
75b757ca 700 { .name = "linkat", .errmsg = true,
82d4a110 701 .arg = { [0] = { .scnprintf = SCA_FDAT, /* fd */ }, }, },
12f3ca4f
ACM
702 { .name = "listxattr", .errmsg = true, },
703 { .name = "llistxattr", .errmsg = true, },
704 { .name = "lremovexattr", .errmsg = true, },
75b757ca 705 { .name = "lseek", .errmsg = true,
82d4a110 706 .arg = { [2] = STRARRAY(whence, whences), }, },
12f3ca4f
ACM
707 { .name = "lsetxattr", .errmsg = true, },
708 { .name = "lstat", .errmsg = true, .alias = "newlstat", },
709 { .name = "lsxattr", .errmsg = true, },
9e9716d1 710 { .name = "madvise", .errmsg = true,
82d4a110
ACM
711 .arg = { [0] = { .scnprintf = SCA_HEX, /* start */ },
712 [2] = { .scnprintf = SCA_MADV_BHV, /* behavior */ }, }, },
12f3ca4f 713 { .name = "mkdir", .errmsg = true, },
75b757ca 714 { .name = "mkdirat", .errmsg = true,
82d4a110 715 .arg = { [0] = { .scnprintf = SCA_FDAT, /* fd */ }, }, },
12f3ca4f 716 { .name = "mknod", .errmsg = true, },
75b757ca 717 { .name = "mknodat", .errmsg = true,
82d4a110 718 .arg = { [0] = { .scnprintf = SCA_FDAT, /* fd */ }, }, },
3d903aa7 719 { .name = "mlock", .errmsg = true,
82d4a110 720 .arg = { [0] = { .scnprintf = SCA_HEX, /* addr */ }, }, },
3d903aa7 721 { .name = "mlockall", .errmsg = true,
82d4a110 722 .arg = { [0] = { .scnprintf = SCA_HEX, /* addr */ }, }, },
beccb2b5 723 { .name = "mmap", .hexret = true,
54265664
JO
724/* The standard mmap maps to old_mmap on s390x */
725#if defined(__s390x__)
726 .alias = "old_mmap",
727#endif
82d4a110
ACM
728 .arg = { [0] = { .scnprintf = SCA_HEX, /* addr */ },
729 [2] = { .scnprintf = SCA_MMAP_PROT, /* prot */ },
730 [3] = { .scnprintf = SCA_MMAP_FLAGS, /* flags */ }, }, },
beccb2b5 731 { .name = "mprotect", .errmsg = true,
82d4a110
ACM
732 .arg = { [0] = { .scnprintf = SCA_HEX, /* start */ },
733 [2] = { .scnprintf = SCA_MMAP_PROT, /* prot */ }, }, },
090389b6 734 { .name = "mq_unlink", .errmsg = true,
82d4a110 735 .arg = { [0] = { .scnprintf = SCA_FILENAME, /* u_name */ }, }, },
ae685380 736 { .name = "mremap", .hexret = true,
82d4a110
ACM
737 .arg = { [0] = { .scnprintf = SCA_HEX, /* addr */ },
738 [3] = { .scnprintf = SCA_MREMAP_FLAGS, /* flags */ },
739 [4] = { .scnprintf = SCA_HEX, /* new_addr */ }, }, },
3d903aa7 740 { .name = "munlock", .errmsg = true,
82d4a110 741 .arg = { [0] = { .scnprintf = SCA_HEX, /* addr */ }, }, },
beccb2b5 742 { .name = "munmap", .errmsg = true,
82d4a110 743 .arg = { [0] = { .scnprintf = SCA_HEX, /* addr */ }, }, },
75b757ca 744 { .name = "name_to_handle_at", .errmsg = true,
82d4a110 745 .arg = { [0] = { .scnprintf = SCA_FDAT, /* dfd */ }, }, },
75b757ca 746 { .name = "newfstatat", .errmsg = true,
82d4a110 747 .arg = { [0] = { .scnprintf = SCA_FDAT, /* dfd */ }, }, },
be65a89a 748 { .name = "open", .errmsg = true,
82d4a110 749 .arg = { [1] = { .scnprintf = SCA_OPEN_FLAGS, /* flags */ }, }, },
31cd3855 750 { .name = "open_by_handle_at", .errmsg = true,
82d4a110
ACM
751 .arg = { [0] = { .scnprintf = SCA_FDAT, /* dfd */ },
752 [2] = { .scnprintf = SCA_OPEN_FLAGS, /* flags */ }, }, },
31cd3855 753 { .name = "openat", .errmsg = true,
82d4a110
ACM
754 .arg = { [0] = { .scnprintf = SCA_FDAT, /* dfd */ },
755 [2] = { .scnprintf = SCA_OPEN_FLAGS, /* flags */ }, }, },
a1c2552d 756 { .name = "perf_event_open", .errmsg = true,
82d4a110
ACM
757 .arg = { [2] = { .scnprintf = SCA_INT, /* cpu */ },
758 [3] = { .scnprintf = SCA_FD, /* group_fd */ },
759 [4] = { .scnprintf = SCA_PERF_FLAGS, /* flags */ }, }, },
46cce19b 760 { .name = "pipe2", .errmsg = true,
82d4a110 761 .arg = { [1] = { .scnprintf = SCA_PIPE_FLAGS, /* flags */ }, }, },
aec1930b
ACM
762 { .name = "poll", .errmsg = true, .timeout = true, },
763 { .name = "ppoll", .errmsg = true, .timeout = true, },
b6565c90
ACM
764 { .name = "pread", .errmsg = true, .alias = "pread64", },
765 { .name = "preadv", .errmsg = true, .alias = "pread", },
82d4a110
ACM
766 { .name = "prlimit64", .errmsg = true,
767 .arg = { [1] = STRARRAY(resource, rlimit_resources), }, },
b6565c90
ACM
768 { .name = "pwrite", .errmsg = true, .alias = "pwrite64", },
769 { .name = "pwritev", .errmsg = true, },
770 { .name = "read", .errmsg = true, },
12f3ca4f 771 { .name = "readlink", .errmsg = true, },
75b757ca 772 { .name = "readlinkat", .errmsg = true,
82d4a110 773 .arg = { [0] = { .scnprintf = SCA_FDAT, /* dfd */ }, }, },
b6565c90 774 { .name = "readv", .errmsg = true, },
b2cc99fd 775 { .name = "recvfrom", .errmsg = true,
82d4a110 776 .arg = { [3] = { .scnprintf = SCA_MSG_FLAGS, /* flags */ }, }, },
b2cc99fd 777 { .name = "recvmmsg", .errmsg = true,
82d4a110 778 .arg = { [3] = { .scnprintf = SCA_MSG_FLAGS, /* flags */ }, }, },
b2cc99fd 779 { .name = "recvmsg", .errmsg = true,
82d4a110 780 .arg = { [2] = { .scnprintf = SCA_MSG_FLAGS, /* flags */ }, }, },
12f3ca4f 781 { .name = "removexattr", .errmsg = true, },
75b757ca 782 { .name = "renameat", .errmsg = true,
82d4a110 783 .arg = { [0] = { .scnprintf = SCA_FDAT, /* dfd */ }, }, },
12f3ca4f 784 { .name = "rmdir", .errmsg = true, },
8bad5b0a 785 { .name = "rt_sigaction", .errmsg = true,
82d4a110
ACM
786 .arg = { [0] = { .scnprintf = SCA_SIGNUM, /* sig */ }, }, },
787 { .name = "rt_sigprocmask", .errmsg = true,
788 .arg = { [0] = STRARRAY(how, sighow), }, },
8bad5b0a 789 { .name = "rt_sigqueueinfo", .errmsg = true,
82d4a110 790 .arg = { [1] = { .scnprintf = SCA_SIGNUM, /* sig */ }, }, },
8bad5b0a 791 { .name = "rt_tgsigqueueinfo", .errmsg = true,
82d4a110 792 .arg = { [2] = { .scnprintf = SCA_SIGNUM, /* sig */ }, }, },
f0bbd602
ACM
793 { .name = "sched_getattr", .errmsg = true, },
794 { .name = "sched_setattr", .errmsg = true, },
a3bca91f 795 { .name = "sched_setscheduler", .errmsg = true,
82d4a110 796 .arg = { [1] = { .scnprintf = SCA_SCHED_POLICY, /* policy */ }, }, },
997bba8c 797 { .name = "seccomp", .errmsg = true,
82d4a110
ACM
798 .arg = { [0] = { .scnprintf = SCA_SECCOMP_OP, /* op */ },
799 [1] = { .scnprintf = SCA_SECCOMP_FLAGS, /* flags */ }, }, },
aec1930b 800 { .name = "select", .errmsg = true, .timeout = true, },
b2cc99fd 801 { .name = "sendmmsg", .errmsg = true,
82d4a110 802 .arg = { [3] = { .scnprintf = SCA_MSG_FLAGS, /* flags */ }, }, },
b2cc99fd 803 { .name = "sendmsg", .errmsg = true,
82d4a110 804 .arg = { [2] = { .scnprintf = SCA_MSG_FLAGS, /* flags */ }, }, },
b2cc99fd 805 { .name = "sendto", .errmsg = true,
82d4a110 806 .arg = { [3] = { .scnprintf = SCA_MSG_FLAGS, /* flags */ }, }, },
c65f1070 807 { .name = "set_tid_address", .errpid = true, },
82d4a110
ACM
808 { .name = "setitimer", .errmsg = true,
809 .arg = { [0] = STRARRAY(which, itimers), }, },
d1d438a3 810 { .name = "setpgid", .errmsg = true, },
82d4a110
ACM
811 { .name = "setrlimit", .errmsg = true,
812 .arg = { [0] = STRARRAY(resource, rlimit_resources), }, },
12f3ca4f 813 { .name = "setxattr", .errmsg = true, },
b6565c90 814 { .name = "shutdown", .errmsg = true, },
e10bce81 815 { .name = "socket", .errmsg = true,
82d4a110
ACM
816 .arg = { [0] = STRARRAY(family, socket_families),
817 [1] = { .scnprintf = SCA_SK_TYPE, /* type */ }, }, },
07120aa5 818 { .name = "socketpair", .errmsg = true,
82d4a110
ACM
819 .arg = { [0] = STRARRAY(family, socket_families),
820 [1] = { .scnprintf = SCA_SK_TYPE, /* type */ }, }, },
12f3ca4f
ACM
821 { .name = "stat", .errmsg = true, .alias = "newstat", },
822 { .name = "statfs", .errmsg = true, },
fd5cead2 823 { .name = "statx", .errmsg = true,
82d4a110
ACM
824 .arg = { [0] = { .scnprintf = SCA_FDAT, /* fdat */ },
825 [2] = { .scnprintf = SCA_STATX_FLAGS, /* flags */ } ,
826 [3] = { .scnprintf = SCA_STATX_MASK, /* mask */ }, }, },
34221118 827 { .name = "swapoff", .errmsg = true,
82d4a110 828 .arg = { [0] = { .scnprintf = SCA_FILENAME, /* specialfile */ }, }, },
34221118 829 { .name = "swapon", .errmsg = true,
82d4a110 830 .arg = { [0] = { .scnprintf = SCA_FILENAME, /* specialfile */ }, }, },
75b757ca 831 { .name = "symlinkat", .errmsg = true,
82d4a110 832 .arg = { [0] = { .scnprintf = SCA_FDAT, /* dfd */ }, }, },
8bad5b0a 833 { .name = "tgkill", .errmsg = true,
82d4a110 834 .arg = { [2] = { .scnprintf = SCA_SIGNUM, /* sig */ }, }, },
8bad5b0a 835 { .name = "tkill", .errmsg = true,
82d4a110 836 .arg = { [1] = { .scnprintf = SCA_SIGNUM, /* sig */ }, }, },
12f3ca4f 837 { .name = "truncate", .errmsg = true, },
e5959683 838 { .name = "uname", .errmsg = true, .alias = "newuname", },
75b757ca 839 { .name = "unlinkat", .errmsg = true,
82d4a110 840 .arg = { [0] = { .scnprintf = SCA_FDAT, /* dfd */ }, }, },
12f3ca4f 841 { .name = "utime", .errmsg = true, },
75b757ca 842 { .name = "utimensat", .errmsg = true,
82d4a110 843 .arg = { [0] = { .scnprintf = SCA_FDAT, /* dirfd */ }, }, },
12f3ca4f 844 { .name = "utimes", .errmsg = true, },
b6565c90 845 { .name = "vmsplice", .errmsg = true, },
11c8e39f 846 { .name = "wait4", .errpid = true,
82d4a110 847 .arg = { [2] = { .scnprintf = SCA_WAITID_OPTIONS, /* options */ }, }, },
11c8e39f 848 { .name = "waitid", .errpid = true,
82d4a110 849 .arg = { [3] = { .scnprintf = SCA_WAITID_OPTIONS, /* options */ }, }, },
b6565c90
ACM
850 { .name = "write", .errmsg = true, },
851 { .name = "writev", .errmsg = true, },
514f1c67
ACM
852};
853
854static int syscall_fmt__cmp(const void *name, const void *fmtp)
855{
856 const struct syscall_fmt *fmt = fmtp;
857 return strcmp(name, fmt->name);
858}
859
860static struct syscall_fmt *syscall_fmt__find(const char *name)
861{
862 const int nmemb = ARRAY_SIZE(syscall_fmts);
863 return bsearch(name, syscall_fmts, nmemb, sizeof(struct syscall_fmt), syscall_fmt__cmp);
864}
865
866struct syscall {
867 struct event_format *tp_format;
f208bd8d
ACM
868 int nr_args;
869 struct format_field *args;
514f1c67 870 const char *name;
5089f20e 871 bool is_exit;
514f1c67 872 struct syscall_fmt *fmt;
82d4a110 873 struct syscall_arg_fmt *arg_fmt;
514f1c67
ACM
874};
875
fd2b2975
ACM
876/*
877 * We need to have this 'calculated' boolean because in some cases we really
878 * don't know what is the duration of a syscall, for instance, when we start
879 * a session and some threads are waiting for a syscall to finish, say 'poll',
880 * in which case all we can do is to print "( ? ) for duration and for the
881 * start timestamp.
882 */
883static size_t fprintf_duration(unsigned long t, bool calculated, FILE *fp)
60c907ab
ACM
884{
885 double duration = (double)t / NSEC_PER_MSEC;
886 size_t printed = fprintf(fp, "(");
887
fd2b2975
ACM
888 if (!calculated)
889 printed += fprintf(fp, " ? ");
890 else if (duration >= 1.0)
60c907ab
ACM
891 printed += color_fprintf(fp, PERF_COLOR_RED, "%6.3f ms", duration);
892 else if (duration >= 0.01)
893 printed += color_fprintf(fp, PERF_COLOR_YELLOW, "%6.3f ms", duration);
894 else
895 printed += color_fprintf(fp, PERF_COLOR_NORMAL, "%6.3f ms", duration);
c24ff998 896 return printed + fprintf(fp, "): ");
60c907ab
ACM
897}
898
f994592d
ACM
899/**
900 * filename.ptr: The filename char pointer that will be vfs_getname'd
901 * filename.entry_str_pos: Where to insert the string translated from
902 * filename.ptr by the vfs_getname tracepoint/kprobe.
84486caa
ACM
903 * ret_scnprintf: syscall args may set this to a different syscall return
904 * formatter, for instance, fcntl may return fds, file flags, etc.
f994592d 905 */
752fde44
ACM
906struct thread_trace {
907 u64 entry_time;
752fde44 908 bool entry_pending;
efd5745e 909 unsigned long nr_events;
a2ea67d7 910 unsigned long pfmaj, pfmin;
752fde44 911 char *entry_str;
1302d88e 912 double runtime_ms;
7ee57434 913 size_t (*ret_scnprintf)(char *bf, size_t size, struct syscall_arg *arg);
f994592d
ACM
914 struct {
915 unsigned long ptr;
7f4f8001
ACM
916 short int entry_str_pos;
917 bool pending_open;
918 unsigned int namelen;
919 char *name;
f994592d 920 } filename;
75b757ca
ACM
921 struct {
922 int max;
923 char **table;
924 } paths;
bf2575c1
DA
925
926 struct intlist *syscall_stats;
752fde44
ACM
927};
928
929static struct thread_trace *thread_trace__new(void)
930{
75b757ca
ACM
931 struct thread_trace *ttrace = zalloc(sizeof(struct thread_trace));
932
933 if (ttrace)
934 ttrace->paths.max = -1;
935
bf2575c1
DA
936 ttrace->syscall_stats = intlist__new(NULL);
937
75b757ca 938 return ttrace;
752fde44
ACM
939}
940
c24ff998 941static struct thread_trace *thread__trace(struct thread *thread, FILE *fp)
752fde44 942{
efd5745e
ACM
943 struct thread_trace *ttrace;
944
752fde44
ACM
945 if (thread == NULL)
946 goto fail;
947
89dceb22
NK
948 if (thread__priv(thread) == NULL)
949 thread__set_priv(thread, thread_trace__new());
48000a1a 950
89dceb22 951 if (thread__priv(thread) == NULL)
752fde44
ACM
952 goto fail;
953
89dceb22 954 ttrace = thread__priv(thread);
efd5745e
ACM
955 ++ttrace->nr_events;
956
957 return ttrace;
752fde44 958fail:
c24ff998 959 color_fprintf(fp, PERF_COLOR_RED,
752fde44
ACM
960 "WARNING: not enough memory, dropping samples!\n");
961 return NULL;
962}
963
84486caa
ACM
964
965void syscall_arg__set_ret_scnprintf(struct syscall_arg *arg,
7ee57434 966 size_t (*ret_scnprintf)(char *bf, size_t size, struct syscall_arg *arg))
84486caa
ACM
967{
968 struct thread_trace *ttrace = thread__priv(arg->thread);
969
970 ttrace->ret_scnprintf = ret_scnprintf;
971}
972
598d02c5
SF
973#define TRACE_PFMAJ (1 << 0)
974#define TRACE_PFMIN (1 << 1)
975
e4d44e83
ACM
976static const size_t trace__entry_str_size = 2048;
977
97119f37 978static int trace__set_fd_pathname(struct thread *thread, int fd, const char *pathname)
75b757ca 979{
89dceb22 980 struct thread_trace *ttrace = thread__priv(thread);
75b757ca
ACM
981
982 if (fd > ttrace->paths.max) {
983 char **npath = realloc(ttrace->paths.table, (fd + 1) * sizeof(char *));
984
985 if (npath == NULL)
986 return -1;
987
988 if (ttrace->paths.max != -1) {
989 memset(npath + ttrace->paths.max + 1, 0,
990 (fd - ttrace->paths.max) * sizeof(char *));
991 } else {
992 memset(npath, 0, (fd + 1) * sizeof(char *));
993 }
994
995 ttrace->paths.table = npath;
996 ttrace->paths.max = fd;
997 }
998
999 ttrace->paths.table[fd] = strdup(pathname);
1000
1001 return ttrace->paths.table[fd] != NULL ? 0 : -1;
1002}
1003
97119f37
ACM
1004static int thread__read_fd_path(struct thread *thread, int fd)
1005{
1006 char linkname[PATH_MAX], pathname[PATH_MAX];
1007 struct stat st;
1008 int ret;
1009
1010 if (thread->pid_ == thread->tid) {
1011 scnprintf(linkname, sizeof(linkname),
1012 "/proc/%d/fd/%d", thread->pid_, fd);
1013 } else {
1014 scnprintf(linkname, sizeof(linkname),
1015 "/proc/%d/task/%d/fd/%d", thread->pid_, thread->tid, fd);
1016 }
1017
1018 if (lstat(linkname, &st) < 0 || st.st_size + 1 > (off_t)sizeof(pathname))
1019 return -1;
1020
1021 ret = readlink(linkname, pathname, sizeof(pathname));
1022
1023 if (ret < 0 || ret > st.st_size)
1024 return -1;
1025
1026 pathname[ret] = '\0';
1027 return trace__set_fd_pathname(thread, fd, pathname);
1028}
1029
c522739d
ACM
1030static const char *thread__fd_path(struct thread *thread, int fd,
1031 struct trace *trace)
75b757ca 1032{
89dceb22 1033 struct thread_trace *ttrace = thread__priv(thread);
75b757ca
ACM
1034
1035 if (ttrace == NULL)
1036 return NULL;
1037
1038 if (fd < 0)
1039 return NULL;
1040
cdcd1e6b 1041 if ((fd > ttrace->paths.max || ttrace->paths.table[fd] == NULL)) {
c522739d
ACM
1042 if (!trace->live)
1043 return NULL;
1044 ++trace->stats.proc_getname;
cdcd1e6b 1045 if (thread__read_fd_path(thread, fd))
c522739d
ACM
1046 return NULL;
1047 }
75b757ca
ACM
1048
1049 return ttrace->paths.table[fd];
1050}
1051
fc65eb82 1052size_t syscall_arg__scnprintf_fd(char *bf, size_t size, struct syscall_arg *arg)
75b757ca
ACM
1053{
1054 int fd = arg->val;
1055 size_t printed = scnprintf(bf, size, "%d", fd);
c522739d 1056 const char *path = thread__fd_path(arg->thread, fd, arg->trace);
75b757ca
ACM
1057
1058 if (path)
1059 printed += scnprintf(bf + printed, size - printed, "<%s>", path);
1060
1061 return printed;
1062}
1063
1064static size_t syscall_arg__scnprintf_close_fd(char *bf, size_t size,
1065 struct syscall_arg *arg)
1066{
1067 int fd = arg->val;
1068 size_t printed = syscall_arg__scnprintf_fd(bf, size, arg);
89dceb22 1069 struct thread_trace *ttrace = thread__priv(arg->thread);
75b757ca 1070
04662523
ACM
1071 if (ttrace && fd >= 0 && fd <= ttrace->paths.max)
1072 zfree(&ttrace->paths.table[fd]);
75b757ca
ACM
1073
1074 return printed;
1075}
1076
f994592d
ACM
1077static void thread__set_filename_pos(struct thread *thread, const char *bf,
1078 unsigned long ptr)
1079{
1080 struct thread_trace *ttrace = thread__priv(thread);
1081
1082 ttrace->filename.ptr = ptr;
1083 ttrace->filename.entry_str_pos = bf - ttrace->entry_str;
1084}
1085
1086static size_t syscall_arg__scnprintf_filename(char *bf, size_t size,
1087 struct syscall_arg *arg)
1088{
1089 unsigned long ptr = arg->val;
1090
1091 if (!arg->trace->vfs_getname)
1092 return scnprintf(bf, size, "%#x", ptr);
1093
1094 thread__set_filename_pos(arg->thread, bf, ptr);
1095 return 0;
1096}
1097
ae9ed035
ACM
1098static bool trace__filter_duration(struct trace *trace, double t)
1099{
1100 return t < (trace->duration_filter * NSEC_PER_MSEC);
1101}
1102
fd2b2975 1103static size_t __trace__fprintf_tstamp(struct trace *trace, u64 tstamp, FILE *fp)
752fde44
ACM
1104{
1105 double ts = (double)(tstamp - trace->base_time) / NSEC_PER_MSEC;
1106
60c907ab 1107 return fprintf(fp, "%10.3f ", ts);
752fde44
ACM
1108}
1109
fd2b2975
ACM
1110/*
1111 * We're handling tstamp=0 as an undefined tstamp, i.e. like when we are
1112 * using ttrace->entry_time for a thread that receives a sys_exit without
1113 * first having received a sys_enter ("poll" issued before tracing session
1114 * starts, lost sys_enter exit due to ring buffer overflow).
1115 */
1116static size_t trace__fprintf_tstamp(struct trace *trace, u64 tstamp, FILE *fp)
1117{
1118 if (tstamp > 0)
1119 return __trace__fprintf_tstamp(trace, tstamp, fp);
1120
1121 return fprintf(fp, " ? ");
1122}
1123
f15eb531 1124static bool done = false;
ba209f85 1125static bool interrupted = false;
f15eb531 1126
ba209f85 1127static void sig_handler(int sig)
f15eb531
NK
1128{
1129 done = true;
ba209f85 1130 interrupted = sig == SIGINT;
f15eb531
NK
1131}
1132
752fde44 1133static size_t trace__fprintf_entry_head(struct trace *trace, struct thread *thread,
fd2b2975 1134 u64 duration, bool duration_calculated, u64 tstamp, FILE *fp)
752fde44
ACM
1135{
1136 size_t printed = trace__fprintf_tstamp(trace, tstamp, fp);
fd2b2975 1137 printed += fprintf_duration(duration, duration_calculated, fp);
752fde44 1138
50c95cbd
ACM
1139 if (trace->multiple_threads) {
1140 if (trace->show_comm)
1902efe7 1141 printed += fprintf(fp, "%.14s/", thread__comm_str(thread));
38051234 1142 printed += fprintf(fp, "%d ", thread->tid);
50c95cbd 1143 }
752fde44
ACM
1144
1145 return printed;
1146}
1147
c24ff998 1148static int trace__process_event(struct trace *trace, struct machine *machine,
162f0bef 1149 union perf_event *event, struct perf_sample *sample)
752fde44
ACM
1150{
1151 int ret = 0;
1152
1153 switch (event->header.type) {
1154 case PERF_RECORD_LOST:
c24ff998 1155 color_fprintf(trace->output, PERF_COLOR_RED,
752fde44 1156 "LOST %" PRIu64 " events!\n", event->lost.lost);
162f0bef 1157 ret = machine__process_lost_event(machine, event, sample);
3ed5ca2e 1158 break;
752fde44 1159 default:
162f0bef 1160 ret = machine__process_event(machine, event, sample);
752fde44
ACM
1161 break;
1162 }
1163
1164 return ret;
1165}
1166
c24ff998 1167static int trace__tool_process(struct perf_tool *tool,
752fde44 1168 union perf_event *event,
162f0bef 1169 struct perf_sample *sample,
752fde44
ACM
1170 struct machine *machine)
1171{
c24ff998 1172 struct trace *trace = container_of(tool, struct trace, tool);
162f0bef 1173 return trace__process_event(trace, machine, event, sample);
752fde44
ACM
1174}
1175
caf8a0d0
ACM
1176static char *trace__machine__resolve_kernel_addr(void *vmachine, unsigned long long *addrp, char **modp)
1177{
1178 struct machine *machine = vmachine;
1179
1180 if (machine->kptr_restrict_warned)
1181 return NULL;
1182
1183 if (symbol_conf.kptr_restrict) {
1184 pr_warning("Kernel address maps (/proc/{kallsyms,modules}) are restricted.\n\n"
1185 "Check /proc/sys/kernel/kptr_restrict.\n\n"
1186 "Kernel samples will not be resolved.\n");
1187 machine->kptr_restrict_warned = true;
1188 return NULL;
1189 }
1190
1191 return machine__resolve_kernel_addr(vmachine, addrp, modp);
1192}
1193
752fde44
ACM
1194static int trace__symbols_init(struct trace *trace, struct perf_evlist *evlist)
1195{
0a7e6d1b 1196 int err = symbol__init(NULL);
752fde44
ACM
1197
1198 if (err)
1199 return err;
1200
8fb598e5
DA
1201 trace->host = machine__new_host();
1202 if (trace->host == NULL)
1203 return -ENOMEM;
752fde44 1204
caf8a0d0 1205 if (trace_event__register_resolver(trace->host, trace__machine__resolve_kernel_addr) < 0)
706c3da4
ACM
1206 return -errno;
1207
a33fbd56 1208 err = __machine__synthesize_threads(trace->host, &trace->tool, &trace->opts.target,
9d9cad76
KL
1209 evlist->threads, trace__tool_process, false,
1210 trace->opts.proc_map_timeout);
752fde44
ACM
1211 if (err)
1212 symbol__exit();
1213
1214 return err;
1215}
1216
13d4ff3e
ACM
1217static int syscall__set_arg_fmts(struct syscall *sc)
1218{
1219 struct format_field *field;
b6565c90 1220 int idx = 0, len;
13d4ff3e 1221
82d4a110
ACM
1222 sc->arg_fmt = calloc(sc->nr_args, sizeof(*sc->arg_fmt));
1223 if (sc->arg_fmt == NULL)
13d4ff3e
ACM
1224 return -1;
1225
82d4a110
ACM
1226 for (field = sc->args; field; field = field->next, ++idx) {
1227 if (sc->fmt) {
1228 sc->arg_fmt[idx] = sc->fmt->arg[idx];
1229
1230 if (sc->fmt->arg[idx].scnprintf)
1231 continue;
1232 }
1f115cb7 1233
82d4a110 1234 if (strcmp(field->type, "const char *") == 0 &&
12f3ca4f
ACM
1235 (strcmp(field->name, "filename") == 0 ||
1236 strcmp(field->name, "path") == 0 ||
1237 strcmp(field->name, "pathname") == 0))
82d4a110 1238 sc->arg_fmt[idx].scnprintf = SCA_FILENAME;
beccb2b5 1239 else if (field->flags & FIELD_IS_POINTER)
82d4a110 1240 sc->arg_fmt[idx].scnprintf = syscall_arg__scnprintf_hex;
d1d438a3 1241 else if (strcmp(field->type, "pid_t") == 0)
82d4a110 1242 sc->arg_fmt[idx].scnprintf = SCA_PID;
ba2f22cf 1243 else if (strcmp(field->type, "umode_t") == 0)
82d4a110 1244 sc->arg_fmt[idx].scnprintf = SCA_MODE_T;
b6565c90
ACM
1245 else if ((strcmp(field->type, "int") == 0 ||
1246 strcmp(field->type, "unsigned int") == 0 ||
1247 strcmp(field->type, "long") == 0) &&
1248 (len = strlen(field->name)) >= 2 &&
1249 strcmp(field->name + len - 2, "fd") == 0) {
1250 /*
1251 * /sys/kernel/tracing/events/syscalls/sys_enter*
1252 * egrep 'field:.*fd;' .../format|sed -r 's/.*field:([a-z ]+) [a-z_]*fd.+/\1/g'|sort|uniq -c
1253 * 65 int
1254 * 23 unsigned int
1255 * 7 unsigned long
1256 */
82d4a110 1257 sc->arg_fmt[idx].scnprintf = SCA_FD;
b6565c90 1258 }
13d4ff3e
ACM
1259 }
1260
1261 return 0;
1262}
1263
514f1c67
ACM
1264static int trace__read_syscall_info(struct trace *trace, int id)
1265{
1266 char tp_name[128];
1267 struct syscall *sc;
fd0db102 1268 const char *name = syscalltbl__name(trace->sctbl, id);
3a531260
ACM
1269
1270 if (name == NULL)
1271 return -1;
514f1c67
ACM
1272
1273 if (id > trace->syscalls.max) {
1274 struct syscall *nsyscalls = realloc(trace->syscalls.table, (id + 1) * sizeof(*sc));
1275
1276 if (nsyscalls == NULL)
1277 return -1;
1278
1279 if (trace->syscalls.max != -1) {
1280 memset(nsyscalls + trace->syscalls.max + 1, 0,
1281 (id - trace->syscalls.max) * sizeof(*sc));
1282 } else {
1283 memset(nsyscalls, 0, (id + 1) * sizeof(*sc));
1284 }
1285
1286 trace->syscalls.table = nsyscalls;
1287 trace->syscalls.max = id;
1288 }
1289
1290 sc = trace->syscalls.table + id;
3a531260 1291 sc->name = name;
2ae3a312 1292
3a531260 1293 sc->fmt = syscall_fmt__find(sc->name);
514f1c67 1294
aec1930b 1295 snprintf(tp_name, sizeof(tp_name), "sys_enter_%s", sc->name);
97978b3e 1296 sc->tp_format = trace_event__tp_format("syscalls", tp_name);
aec1930b 1297
8dd2a131 1298 if (IS_ERR(sc->tp_format) && sc->fmt && sc->fmt->alias) {
aec1930b 1299 snprintf(tp_name, sizeof(tp_name), "sys_enter_%s", sc->fmt->alias);
97978b3e 1300 sc->tp_format = trace_event__tp_format("syscalls", tp_name);
aec1930b 1301 }
514f1c67 1302
8dd2a131 1303 if (IS_ERR(sc->tp_format))
13d4ff3e
ACM
1304 return -1;
1305
f208bd8d
ACM
1306 sc->args = sc->tp_format->format.fields;
1307 sc->nr_args = sc->tp_format->format.nr_fields;
c42de706
TS
1308 /*
1309 * We need to check and discard the first variable '__syscall_nr'
1310 * or 'nr' that mean the syscall number. It is needless here.
1311 * So drop '__syscall_nr' or 'nr' field but does not exist on older kernels.
1312 */
1313 if (sc->args && (!strcmp(sc->args->name, "__syscall_nr") || !strcmp(sc->args->name, "nr"))) {
f208bd8d
ACM
1314 sc->args = sc->args->next;
1315 --sc->nr_args;
1316 }
1317
5089f20e
ACM
1318 sc->is_exit = !strcmp(name, "exit_group") || !strcmp(name, "exit");
1319
13d4ff3e 1320 return syscall__set_arg_fmts(sc);
514f1c67
ACM
1321}
1322
d0cc439b
ACM
1323static int trace__validate_ev_qualifier(struct trace *trace)
1324{
8b3ce757 1325 int err = 0, i;
d0cc439b
ACM
1326 struct str_node *pos;
1327
8b3ce757
ACM
1328 trace->ev_qualifier_ids.nr = strlist__nr_entries(trace->ev_qualifier);
1329 trace->ev_qualifier_ids.entries = malloc(trace->ev_qualifier_ids.nr *
1330 sizeof(trace->ev_qualifier_ids.entries[0]));
1331
1332 if (trace->ev_qualifier_ids.entries == NULL) {
1333 fputs("Error:\tNot enough memory for allocating events qualifier ids\n",
1334 trace->output);
1335 err = -EINVAL;
1336 goto out;
1337 }
1338
1339 i = 0;
1340
602a1f4d 1341 strlist__for_each_entry(pos, trace->ev_qualifier) {
d0cc439b 1342 const char *sc = pos->s;
fd0db102 1343 int id = syscalltbl__id(trace->sctbl, sc);
d0cc439b 1344
8b3ce757 1345 if (id < 0) {
d0cc439b
ACM
1346 if (err == 0) {
1347 fputs("Error:\tInvalid syscall ", trace->output);
1348 err = -EINVAL;
1349 } else {
1350 fputs(", ", trace->output);
1351 }
1352
1353 fputs(sc, trace->output);
1354 }
8b3ce757
ACM
1355
1356 trace->ev_qualifier_ids.entries[i++] = id;
d0cc439b
ACM
1357 }
1358
1359 if (err < 0) {
1360 fputs("\nHint:\ttry 'perf list syscalls:sys_enter_*'"
1361 "\nHint:\tand: 'man syscalls'\n", trace->output);
8b3ce757
ACM
1362 zfree(&trace->ev_qualifier_ids.entries);
1363 trace->ev_qualifier_ids.nr = 0;
d0cc439b 1364 }
8b3ce757 1365out:
d0cc439b
ACM
1366 return err;
1367}
1368
55d43bca
DA
1369/*
1370 * args is to be interpreted as a series of longs but we need to handle
1371 * 8-byte unaligned accesses. args points to raw_data within the event
1372 * and raw_data is guaranteed to be 8-byte unaligned because it is
1373 * preceded by raw_size which is a u32. So we need to copy args to a temp
1374 * variable to read it. Most notably this avoids extended load instructions
1375 * on unaligned addresses
1376 */
f9f83b33
ACM
1377static unsigned long __syscall_arg__val(unsigned char *args, u8 idx)
1378{
1379 unsigned long val;
1380 unsigned char *p = args + sizeof(unsigned long) * idx;
1381
1382 memcpy(&val, p, sizeof(val));
1383 return val;
1384}
1385
1386unsigned long syscall_arg__val(struct syscall_arg *arg, u8 idx)
1387{
1388 return __syscall_arg__val(arg->args, idx);
1389}
55d43bca 1390
752fde44 1391static size_t syscall__scnprintf_args(struct syscall *sc, char *bf, size_t size,
55d43bca 1392 unsigned char *args, struct trace *trace,
75b757ca 1393 struct thread *thread)
514f1c67 1394{
514f1c67 1395 size_t printed = 0;
55d43bca 1396 unsigned long val;
84486caa
ACM
1397 struct thread_trace *ttrace = thread__priv(thread);
1398
1399 /*
1400 * Things like fcntl will set this in its 'cmd' formatter to pick the
1401 * right formatter for the return value (an fd? file flags?), which is
1402 * not needed for syscalls that always return a given type, say an fd.
1403 */
1404 ttrace->ret_scnprintf = NULL;
514f1c67 1405
f208bd8d 1406 if (sc->args != NULL) {
514f1c67 1407 struct format_field *field;
01533e97
ACM
1408 u8 bit = 1;
1409 struct syscall_arg arg = {
f9f83b33 1410 .args = args,
75b757ca
ACM
1411 .idx = 0,
1412 .mask = 0,
1413 .trace = trace,
1414 .thread = thread,
01533e97 1415 };
6e7eeb51 1416
f208bd8d 1417 for (field = sc->args; field;
01533e97
ACM
1418 field = field->next, ++arg.idx, bit <<= 1) {
1419 if (arg.mask & bit)
6e7eeb51 1420 continue;
55d43bca 1421
f9f83b33 1422 val = syscall_arg__val(&arg, arg.idx);
55d43bca 1423
4aa58232
ACM
1424 /*
1425 * Suppress this argument if its value is zero and
1426 * and we don't have a string associated in an
1427 * strarray for it.
1428 */
55d43bca 1429 if (val == 0 &&
82d4a110
ACM
1430 !(sc->arg_fmt &&
1431 (sc->arg_fmt[arg.idx].scnprintf == SCA_STRARRAY ||
1432 sc->arg_fmt[arg.idx].scnprintf == SCA_STRARRAYS) &&
1433 sc->arg_fmt[arg.idx].parm))
22ae5cf1
ACM
1434 continue;
1435
752fde44 1436 printed += scnprintf(bf + printed, size - printed,
13d4ff3e 1437 "%s%s: ", printed ? ", " : "", field->name);
82d4a110 1438 if (sc->arg_fmt && sc->arg_fmt[arg.idx].scnprintf) {
55d43bca 1439 arg.val = val;
82d4a110
ACM
1440 if (sc->arg_fmt[arg.idx].parm)
1441 arg.parm = sc->arg_fmt[arg.idx].parm;
1442 printed += sc->arg_fmt[arg.idx].scnprintf(bf + printed, size - printed, &arg);
6e7eeb51 1443 } else {
13d4ff3e 1444 printed += scnprintf(bf + printed, size - printed,
55d43bca 1445 "%ld", val);
6e7eeb51 1446 }
514f1c67 1447 }
4c4d6e51
ACM
1448 } else if (IS_ERR(sc->tp_format)) {
1449 /*
1450 * If we managed to read the tracepoint /format file, then we
1451 * may end up not having any args, like with gettid(), so only
1452 * print the raw args when we didn't manage to read it.
1453 */
01533e97
ACM
1454 int i = 0;
1455
514f1c67 1456 while (i < 6) {
f9f83b33 1457 val = __syscall_arg__val(args, i);
752fde44
ACM
1458 printed += scnprintf(bf + printed, size - printed,
1459 "%sarg%d: %ld",
55d43bca 1460 printed ? ", " : "", i, val);
514f1c67
ACM
1461 ++i;
1462 }
1463 }
1464
1465 return printed;
1466}
1467
ba3d7dee 1468typedef int (*tracepoint_handler)(struct trace *trace, struct perf_evsel *evsel,
0c82adcf 1469 union perf_event *event,
ba3d7dee
ACM
1470 struct perf_sample *sample);
1471
1472static struct syscall *trace__syscall_info(struct trace *trace,
bf2575c1 1473 struct perf_evsel *evsel, int id)
ba3d7dee 1474{
ba3d7dee
ACM
1475
1476 if (id < 0) {
adaa18bf
ACM
1477
1478 /*
1479 * XXX: Noticed on x86_64, reproduced as far back as 3.0.36, haven't tried
1480 * before that, leaving at a higher verbosity level till that is
1481 * explained. Reproduced with plain ftrace with:
1482 *
1483 * echo 1 > /t/events/raw_syscalls/sys_exit/enable
1484 * grep "NR -1 " /t/trace_pipe
1485 *
1486 * After generating some load on the machine.
1487 */
1488 if (verbose > 1) {
1489 static u64 n;
1490 fprintf(trace->output, "Invalid syscall %d id, skipping (%s, %" PRIu64 ") ...\n",
1491 id, perf_evsel__name(evsel), ++n);
1492 }
ba3d7dee
ACM
1493 return NULL;
1494 }
1495
1496 if ((id > trace->syscalls.max || trace->syscalls.table[id].name == NULL) &&
1497 trace__read_syscall_info(trace, id))
1498 goto out_cant_read;
1499
1500 if ((id > trace->syscalls.max || trace->syscalls.table[id].name == NULL))
1501 goto out_cant_read;
1502
1503 return &trace->syscalls.table[id];
1504
1505out_cant_read:
bb963e16 1506 if (verbose > 0) {
7c304ee0
ACM
1507 fprintf(trace->output, "Problems reading syscall %d", id);
1508 if (id <= trace->syscalls.max && trace->syscalls.table[id].name != NULL)
1509 fprintf(trace->output, "(%s)", trace->syscalls.table[id].name);
1510 fputs(" information\n", trace->output);
1511 }
ba3d7dee
ACM
1512 return NULL;
1513}
1514
bf2575c1
DA
1515static void thread__update_stats(struct thread_trace *ttrace,
1516 int id, struct perf_sample *sample)
1517{
1518 struct int_node *inode;
1519 struct stats *stats;
1520 u64 duration = 0;
1521
1522 inode = intlist__findnew(ttrace->syscall_stats, id);
1523 if (inode == NULL)
1524 return;
1525
1526 stats = inode->priv;
1527 if (stats == NULL) {
1528 stats = malloc(sizeof(struct stats));
1529 if (stats == NULL)
1530 return;
1531 init_stats(stats);
1532 inode->priv = stats;
1533 }
1534
1535 if (ttrace->entry_time && sample->time > ttrace->entry_time)
1536 duration = sample->time - ttrace->entry_time;
1537
1538 update_stats(stats, duration);
1539}
1540
e596663e
ACM
1541static int trace__printf_interrupted_entry(struct trace *trace, struct perf_sample *sample)
1542{
1543 struct thread_trace *ttrace;
1544 u64 duration;
1545 size_t printed;
1546
1547 if (trace->current == NULL)
1548 return 0;
1549
1550 ttrace = thread__priv(trace->current);
1551
1552 if (!ttrace->entry_pending)
1553 return 0;
1554
1555 duration = sample->time - ttrace->entry_time;
1556
fd2b2975 1557 printed = trace__fprintf_entry_head(trace, trace->current, duration, true, ttrace->entry_time, trace->output);
e596663e
ACM
1558 printed += fprintf(trace->output, "%-70s) ...\n", ttrace->entry_str);
1559 ttrace->entry_pending = false;
1560
1561 return printed;
1562}
1563
ba3d7dee 1564static int trace__sys_enter(struct trace *trace, struct perf_evsel *evsel,
0c82adcf 1565 union perf_event *event __maybe_unused,
ba3d7dee
ACM
1566 struct perf_sample *sample)
1567{
752fde44 1568 char *msg;
ba3d7dee 1569 void *args;
752fde44 1570 size_t printed = 0;
2ae3a312 1571 struct thread *thread;
b91fc39f 1572 int id = perf_evsel__sc_tp_uint(evsel, id, sample), err = -1;
bf2575c1 1573 struct syscall *sc = trace__syscall_info(trace, evsel, id);
2ae3a312
ACM
1574 struct thread_trace *ttrace;
1575
1576 if (sc == NULL)
1577 return -1;
ba3d7dee 1578
8fb598e5 1579 thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
c24ff998 1580 ttrace = thread__trace(thread, trace->output);
2ae3a312 1581 if (ttrace == NULL)
b91fc39f 1582 goto out_put;
ba3d7dee 1583
77170988 1584 args = perf_evsel__sc_tp_ptr(evsel, args, sample);
752fde44
ACM
1585
1586 if (ttrace->entry_str == NULL) {
e4d44e83 1587 ttrace->entry_str = malloc(trace__entry_str_size);
752fde44 1588 if (!ttrace->entry_str)
b91fc39f 1589 goto out_put;
752fde44
ACM
1590 }
1591
5cf9c84e 1592 if (!(trace->duration_filter || trace->summary_only || trace->min_stack))
6ebad5c1 1593 trace__printf_interrupted_entry(trace, sample);
e596663e 1594
752fde44
ACM
1595 ttrace->entry_time = sample->time;
1596 msg = ttrace->entry_str;
e4d44e83 1597 printed += scnprintf(msg + printed, trace__entry_str_size - printed, "%s(", sc->name);
752fde44 1598
e4d44e83 1599 printed += syscall__scnprintf_args(sc, msg + printed, trace__entry_str_size - printed,
75b757ca 1600 args, trace, thread);
752fde44 1601
5089f20e 1602 if (sc->is_exit) {
5cf9c84e 1603 if (!(trace->duration_filter || trace->summary_only || trace->min_stack)) {
fd2b2975 1604 trace__fprintf_entry_head(trace, thread, 0, false, ttrace->entry_time, trace->output);
c008f78f 1605 fprintf(trace->output, "%-70s)\n", ttrace->entry_str);
ae9ed035 1606 }
7f4f8001 1607 } else {
752fde44 1608 ttrace->entry_pending = true;
7f4f8001
ACM
1609 /* See trace__vfs_getname & trace__sys_exit */
1610 ttrace->filename.pending_open = false;
1611 }
ba3d7dee 1612
f3b623b8
ACM
1613 if (trace->current != thread) {
1614 thread__put(trace->current);
1615 trace->current = thread__get(thread);
1616 }
b91fc39f
ACM
1617 err = 0;
1618out_put:
1619 thread__put(thread);
1620 return err;
ba3d7dee
ACM
1621}
1622
5cf9c84e
ACM
1623static int trace__resolve_callchain(struct trace *trace, struct perf_evsel *evsel,
1624 struct perf_sample *sample,
1625 struct callchain_cursor *cursor)
202ff968
ACM
1626{
1627 struct addr_location al;
5cf9c84e
ACM
1628
1629 if (machine__resolve(trace->host, &al, sample) < 0 ||
1630 thread__resolve_callchain(al.thread, cursor, evsel, sample, NULL, NULL, trace->max_stack))
1631 return -1;
1632
1633 return 0;
1634}
1635
1636static int trace__fprintf_callchain(struct trace *trace, struct perf_sample *sample)
1637{
202ff968 1638 /* TODO: user-configurable print_opts */
e20ab86e
ACM
1639 const unsigned int print_opts = EVSEL__PRINT_SYM |
1640 EVSEL__PRINT_DSO |
1641 EVSEL__PRINT_UNKNOWN_AS_ADDR;
202ff968 1642
d327e60c 1643 return sample__fprintf_callchain(sample, 38, print_opts, &callchain_cursor, trace->output);
202ff968
ACM
1644}
1645
ba3d7dee 1646static int trace__sys_exit(struct trace *trace, struct perf_evsel *evsel,
0c82adcf 1647 union perf_event *event __maybe_unused,
ba3d7dee
ACM
1648 struct perf_sample *sample)
1649{
2c82c3ad 1650 long ret;
60c907ab 1651 u64 duration = 0;
fd2b2975 1652 bool duration_calculated = false;
2ae3a312 1653 struct thread *thread;
5cf9c84e 1654 int id = perf_evsel__sc_tp_uint(evsel, id, sample), err = -1, callchain_ret = 0;
bf2575c1 1655 struct syscall *sc = trace__syscall_info(trace, evsel, id);
2ae3a312
ACM
1656 struct thread_trace *ttrace;
1657
1658 if (sc == NULL)
1659 return -1;
ba3d7dee 1660
8fb598e5 1661 thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
c24ff998 1662 ttrace = thread__trace(thread, trace->output);
2ae3a312 1663 if (ttrace == NULL)
b91fc39f 1664 goto out_put;
ba3d7dee 1665
bf2575c1
DA
1666 if (trace->summary)
1667 thread__update_stats(ttrace, id, sample);
1668
77170988 1669 ret = perf_evsel__sc_tp_uint(evsel, ret, sample);
ba3d7dee 1670
fd0db102 1671 if (id == trace->open_id && ret >= 0 && ttrace->filename.pending_open) {
7f4f8001
ACM
1672 trace__set_fd_pathname(thread, ret, ttrace->filename.name);
1673 ttrace->filename.pending_open = false;
c522739d
ACM
1674 ++trace->stats.vfs_getname;
1675 }
1676
ae9ed035 1677 if (ttrace->entry_time) {
60c907ab 1678 duration = sample->time - ttrace->entry_time;
ae9ed035
ACM
1679 if (trace__filter_duration(trace, duration))
1680 goto out;
fd2b2975 1681 duration_calculated = true;
ae9ed035
ACM
1682 } else if (trace->duration_filter)
1683 goto out;
60c907ab 1684
5cf9c84e
ACM
1685 if (sample->callchain) {
1686 callchain_ret = trace__resolve_callchain(trace, evsel, sample, &callchain_cursor);
1687 if (callchain_ret == 0) {
1688 if (callchain_cursor.nr < trace->min_stack)
1689 goto out;
1690 callchain_ret = 1;
1691 }
1692 }
1693
fd2eabaf
DA
1694 if (trace->summary_only)
1695 goto out;
1696
fd2b2975 1697 trace__fprintf_entry_head(trace, thread, duration, duration_calculated, ttrace->entry_time, trace->output);
752fde44
ACM
1698
1699 if (ttrace->entry_pending) {
c24ff998 1700 fprintf(trace->output, "%-70s", ttrace->entry_str);
752fde44 1701 } else {
c24ff998
ACM
1702 fprintf(trace->output, " ... [");
1703 color_fprintf(trace->output, PERF_COLOR_YELLOW, "continued");
1704 fprintf(trace->output, "]: %s()", sc->name);
752fde44
ACM
1705 }
1706
da3c9a44
ACM
1707 if (sc->fmt == NULL) {
1708signed_print:
2c82c3ad 1709 fprintf(trace->output, ") = %ld", ret);
11c8e39f 1710 } else if (ret < 0 && (sc->fmt->errmsg || sc->fmt->errpid)) {
942a91ed 1711 char bf[STRERR_BUFSIZE];
c8b5f2c9 1712 const char *emsg = str_error_r(-ret, bf, sizeof(bf)),
ba3d7dee
ACM
1713 *e = audit_errno_to_name(-ret);
1714
c24ff998 1715 fprintf(trace->output, ") = -1 %s %s", e, emsg);
da3c9a44 1716 } else if (ret == 0 && sc->fmt->timeout)
c24ff998 1717 fprintf(trace->output, ") = 0 Timeout");
84486caa
ACM
1718 else if (ttrace->ret_scnprintf) {
1719 char bf[1024];
7ee57434
ACM
1720 struct syscall_arg arg = {
1721 .val = ret,
1722 .thread = thread,
1723 .trace = trace,
1724 };
1725 ttrace->ret_scnprintf(bf, sizeof(bf), &arg);
84486caa
ACM
1726 ttrace->ret_scnprintf = NULL;
1727 fprintf(trace->output, ") = %s", bf);
1728 } else if (sc->fmt->hexret)
2c82c3ad 1729 fprintf(trace->output, ") = %#lx", ret);
11c8e39f
ACM
1730 else if (sc->fmt->errpid) {
1731 struct thread *child = machine__find_thread(trace->host, ret, ret);
1732
1733 if (child != NULL) {
1734 fprintf(trace->output, ") = %ld", ret);
1735 if (child->comm_set)
1736 fprintf(trace->output, " (%s)", thread__comm_str(child));
1737 thread__put(child);
1738 }
1739 } else
da3c9a44 1740 goto signed_print;
ba3d7dee 1741
c24ff998 1742 fputc('\n', trace->output);
566a0885 1743
5cf9c84e
ACM
1744 if (callchain_ret > 0)
1745 trace__fprintf_callchain(trace, sample);
1746 else if (callchain_ret < 0)
1747 pr_err("Problem processing %s callchain, skipping...\n", perf_evsel__name(evsel));
ae9ed035 1748out:
752fde44 1749 ttrace->entry_pending = false;
b91fc39f
ACM
1750 err = 0;
1751out_put:
1752 thread__put(thread);
1753 return err;
ba3d7dee
ACM
1754}
1755
c522739d 1756static int trace__vfs_getname(struct trace *trace, struct perf_evsel *evsel,
0c82adcf 1757 union perf_event *event __maybe_unused,
c522739d
ACM
1758 struct perf_sample *sample)
1759{
f994592d
ACM
1760 struct thread *thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
1761 struct thread_trace *ttrace;
1762 size_t filename_len, entry_str_len, to_move;
1763 ssize_t remaining_space;
1764 char *pos;
7f4f8001 1765 const char *filename = perf_evsel__rawptr(evsel, sample, "pathname");
f994592d
ACM
1766
1767 if (!thread)
1768 goto out;
1769
1770 ttrace = thread__priv(thread);
1771 if (!ttrace)
ef65e96e 1772 goto out_put;
f994592d 1773
7f4f8001 1774 filename_len = strlen(filename);
39f0e7a8 1775 if (filename_len == 0)
ef65e96e 1776 goto out_put;
7f4f8001
ACM
1777
1778 if (ttrace->filename.namelen < filename_len) {
1779 char *f = realloc(ttrace->filename.name, filename_len + 1);
1780
1781 if (f == NULL)
ef65e96e 1782 goto out_put;
7f4f8001
ACM
1783
1784 ttrace->filename.namelen = filename_len;
1785 ttrace->filename.name = f;
1786 }
1787
1788 strcpy(ttrace->filename.name, filename);
1789 ttrace->filename.pending_open = true;
1790
f994592d 1791 if (!ttrace->filename.ptr)
ef65e96e 1792 goto out_put;
f994592d
ACM
1793
1794 entry_str_len = strlen(ttrace->entry_str);
1795 remaining_space = trace__entry_str_size - entry_str_len - 1; /* \0 */
1796 if (remaining_space <= 0)
ef65e96e 1797 goto out_put;
f994592d 1798
f994592d
ACM
1799 if (filename_len > (size_t)remaining_space) {
1800 filename += filename_len - remaining_space;
1801 filename_len = remaining_space;
1802 }
1803
1804 to_move = entry_str_len - ttrace->filename.entry_str_pos + 1; /* \0 */
1805 pos = ttrace->entry_str + ttrace->filename.entry_str_pos;
1806 memmove(pos + filename_len, pos, to_move);
1807 memcpy(pos, filename, filename_len);
1808
1809 ttrace->filename.ptr = 0;
1810 ttrace->filename.entry_str_pos = 0;
ef65e96e
ACM
1811out_put:
1812 thread__put(thread);
f994592d 1813out:
c522739d
ACM
1814 return 0;
1815}
1816
1302d88e 1817static int trace__sched_stat_runtime(struct trace *trace, struct perf_evsel *evsel,
0c82adcf 1818 union perf_event *event __maybe_unused,
1302d88e
ACM
1819 struct perf_sample *sample)
1820{
1821 u64 runtime = perf_evsel__intval(evsel, sample, "runtime");
1822 double runtime_ms = (double)runtime / NSEC_PER_MSEC;
8fb598e5 1823 struct thread *thread = machine__findnew_thread(trace->host,
314add6b
AH
1824 sample->pid,
1825 sample->tid);
c24ff998 1826 struct thread_trace *ttrace = thread__trace(thread, trace->output);
1302d88e
ACM
1827
1828 if (ttrace == NULL)
1829 goto out_dump;
1830
1831 ttrace->runtime_ms += runtime_ms;
1832 trace->runtime_ms += runtime_ms;
ef65e96e 1833out_put:
b91fc39f 1834 thread__put(thread);
1302d88e
ACM
1835 return 0;
1836
1837out_dump:
c24ff998 1838 fprintf(trace->output, "%s: comm=%s,pid=%u,runtime=%" PRIu64 ",vruntime=%" PRIu64 ")\n",
1302d88e
ACM
1839 evsel->name,
1840 perf_evsel__strval(evsel, sample, "comm"),
1841 (pid_t)perf_evsel__intval(evsel, sample, "pid"),
1842 runtime,
1843 perf_evsel__intval(evsel, sample, "vruntime"));
ef65e96e 1844 goto out_put;
1302d88e
ACM
1845}
1846
1d6c9407
WN
1847static void bpf_output__printer(enum binary_printer_ops op,
1848 unsigned int val, void *extra)
1849{
1850 FILE *output = extra;
1851 unsigned char ch = (unsigned char)val;
1852
1853 switch (op) {
1854 case BINARY_PRINT_CHAR_DATA:
1855 fprintf(output, "%c", isprint(ch) ? ch : '.');
1856 break;
1857 case BINARY_PRINT_DATA_BEGIN:
1858 case BINARY_PRINT_LINE_BEGIN:
1859 case BINARY_PRINT_ADDR:
1860 case BINARY_PRINT_NUM_DATA:
1861 case BINARY_PRINT_NUM_PAD:
1862 case BINARY_PRINT_SEP:
1863 case BINARY_PRINT_CHAR_PAD:
1864 case BINARY_PRINT_LINE_END:
1865 case BINARY_PRINT_DATA_END:
1866 default:
1867 break;
1868 }
1869}
1870
1871static void bpf_output__fprintf(struct trace *trace,
1872 struct perf_sample *sample)
1873{
1874 print_binary(sample->raw_data, sample->raw_size, 8,
1875 bpf_output__printer, trace->output);
1876}
1877
14a052df
ACM
1878static int trace__event_handler(struct trace *trace, struct perf_evsel *evsel,
1879 union perf_event *event __maybe_unused,
1880 struct perf_sample *sample)
1881{
7ad35615
ACM
1882 int callchain_ret = 0;
1883
1884 if (sample->callchain) {
1885 callchain_ret = trace__resolve_callchain(trace, evsel, sample, &callchain_cursor);
1886 if (callchain_ret == 0) {
1887 if (callchain_cursor.nr < trace->min_stack)
1888 goto out;
1889 callchain_ret = 1;
1890 }
1891 }
1892
14a052df
ACM
1893 trace__printf_interrupted_entry(trace, sample);
1894 trace__fprintf_tstamp(trace, sample->time, trace->output);
0808921a
ACM
1895
1896 if (trace->trace_syscalls)
1897 fprintf(trace->output, "( ): ");
1898
1899 fprintf(trace->output, "%s:", evsel->name);
14a052df 1900
1d6c9407
WN
1901 if (perf_evsel__is_bpf_output(evsel)) {
1902 bpf_output__fprintf(trace, sample);
1903 } else if (evsel->tp_format) {
14a052df
ACM
1904 event_format__fprintf(evsel->tp_format, sample->cpu,
1905 sample->raw_data, sample->raw_size,
1906 trace->output);
1907 }
1908
1909 fprintf(trace->output, ")\n");
202ff968 1910
7ad35615
ACM
1911 if (callchain_ret > 0)
1912 trace__fprintf_callchain(trace, sample);
1913 else if (callchain_ret < 0)
1914 pr_err("Problem processing %s callchain, skipping...\n", perf_evsel__name(evsel));
1915out:
14a052df
ACM
1916 return 0;
1917}
1918
598d02c5
SF
1919static void print_location(FILE *f, struct perf_sample *sample,
1920 struct addr_location *al,
1921 bool print_dso, bool print_sym)
1922{
1923
bb963e16 1924 if ((verbose > 0 || print_dso) && al->map)
598d02c5
SF
1925 fprintf(f, "%s@", al->map->dso->long_name);
1926
bb963e16 1927 if ((verbose > 0 || print_sym) && al->sym)
4414a3c5 1928 fprintf(f, "%s+0x%" PRIx64, al->sym->name,
598d02c5
SF
1929 al->addr - al->sym->start);
1930 else if (al->map)
4414a3c5 1931 fprintf(f, "0x%" PRIx64, al->addr);
598d02c5 1932 else
4414a3c5 1933 fprintf(f, "0x%" PRIx64, sample->addr);
598d02c5
SF
1934}
1935
1936static int trace__pgfault(struct trace *trace,
1937 struct perf_evsel *evsel,
473398a2 1938 union perf_event *event __maybe_unused,
598d02c5
SF
1939 struct perf_sample *sample)
1940{
1941 struct thread *thread;
598d02c5
SF
1942 struct addr_location al;
1943 char map_type = 'd';
a2ea67d7 1944 struct thread_trace *ttrace;
b91fc39f 1945 int err = -1;
1df54290 1946 int callchain_ret = 0;
598d02c5
SF
1947
1948 thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
1df54290
ACM
1949
1950 if (sample->callchain) {
1951 callchain_ret = trace__resolve_callchain(trace, evsel, sample, &callchain_cursor);
1952 if (callchain_ret == 0) {
1953 if (callchain_cursor.nr < trace->min_stack)
1954 goto out_put;
1955 callchain_ret = 1;
1956 }
1957 }
1958
a2ea67d7
SF
1959 ttrace = thread__trace(thread, trace->output);
1960 if (ttrace == NULL)
b91fc39f 1961 goto out_put;
a2ea67d7
SF
1962
1963 if (evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS_MAJ)
1964 ttrace->pfmaj++;
1965 else
1966 ttrace->pfmin++;
1967
1968 if (trace->summary_only)
b91fc39f 1969 goto out;
598d02c5 1970
473398a2 1971 thread__find_addr_location(thread, sample->cpumode, MAP__FUNCTION,
598d02c5
SF
1972 sample->ip, &al);
1973
fd2b2975 1974 trace__fprintf_entry_head(trace, thread, 0, true, sample->time, trace->output);
598d02c5
SF
1975
1976 fprintf(trace->output, "%sfault [",
1977 evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS_MAJ ?
1978 "maj" : "min");
1979
1980 print_location(trace->output, sample, &al, false, true);
1981
1982 fprintf(trace->output, "] => ");
1983
473398a2 1984 thread__find_addr_location(thread, sample->cpumode, MAP__VARIABLE,
598d02c5
SF
1985 sample->addr, &al);
1986
1987 if (!al.map) {
473398a2 1988 thread__find_addr_location(thread, sample->cpumode,
598d02c5
SF
1989 MAP__FUNCTION, sample->addr, &al);
1990
1991 if (al.map)
1992 map_type = 'x';
1993 else
1994 map_type = '?';
1995 }
1996
1997 print_location(trace->output, sample, &al, true, false);
1998
1999 fprintf(trace->output, " (%c%c)\n", map_type, al.level);
0c3a6ef4 2000
1df54290
ACM
2001 if (callchain_ret > 0)
2002 trace__fprintf_callchain(trace, sample);
2003 else if (callchain_ret < 0)
2004 pr_err("Problem processing %s callchain, skipping...\n", perf_evsel__name(evsel));
b91fc39f
ACM
2005out:
2006 err = 0;
2007out_put:
2008 thread__put(thread);
2009 return err;
598d02c5
SF
2010}
2011
e6001980 2012static void trace__set_base_time(struct trace *trace,
8a07a809 2013 struct perf_evsel *evsel,
e6001980
ACM
2014 struct perf_sample *sample)
2015{
8a07a809
ACM
2016 /*
2017 * BPF events were not setting PERF_SAMPLE_TIME, so be more robust
2018 * and don't use sample->time unconditionally, we may end up having
2019 * some other event in the future without PERF_SAMPLE_TIME for good
2020 * reason, i.e. we may not be interested in its timestamps, just in
2021 * it taking place, picking some piece of information when it
2022 * appears in our event stream (vfs_getname comes to mind).
2023 */
2024 if (trace->base_time == 0 && !trace->full_time &&
2025 (evsel->attr.sample_type & PERF_SAMPLE_TIME))
e6001980
ACM
2026 trace->base_time = sample->time;
2027}
2028
6810fc91 2029static int trace__process_sample(struct perf_tool *tool,
0c82adcf 2030 union perf_event *event,
6810fc91
DA
2031 struct perf_sample *sample,
2032 struct perf_evsel *evsel,
2033 struct machine *machine __maybe_unused)
2034{
2035 struct trace *trace = container_of(tool, struct trace, tool);
aa07df6e 2036 struct thread *thread;
6810fc91
DA
2037 int err = 0;
2038
744a9719 2039 tracepoint_handler handler = evsel->handler;
6810fc91 2040
aa07df6e
DA
2041 thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
2042 if (thread && thread__is_filtered(thread))
ef65e96e 2043 goto out;
bdc89661 2044
e6001980 2045 trace__set_base_time(trace, evsel, sample);
6810fc91 2046
3160565f
DA
2047 if (handler) {
2048 ++trace->nr_events;
0c82adcf 2049 handler(trace, evsel, event, sample);
3160565f 2050 }
ef65e96e
ACM
2051out:
2052 thread__put(thread);
6810fc91
DA
2053 return err;
2054}
2055
1e28fe0a 2056static int trace__record(struct trace *trace, int argc, const char **argv)
5e2485b1
DA
2057{
2058 unsigned int rec_argc, i, j;
2059 const char **rec_argv;
2060 const char * const record_args[] = {
2061 "record",
2062 "-R",
2063 "-m", "1024",
2064 "-c", "1",
5e2485b1
DA
2065 };
2066
1e28fe0a
SF
2067 const char * const sc_args[] = { "-e", };
2068 unsigned int sc_args_nr = ARRAY_SIZE(sc_args);
2069 const char * const majpf_args[] = { "-e", "major-faults" };
2070 unsigned int majpf_args_nr = ARRAY_SIZE(majpf_args);
2071 const char * const minpf_args[] = { "-e", "minor-faults" };
2072 unsigned int minpf_args_nr = ARRAY_SIZE(minpf_args);
2073
9aca7f17 2074 /* +1 is for the event string below */
1e28fe0a
SF
2075 rec_argc = ARRAY_SIZE(record_args) + sc_args_nr + 1 +
2076 majpf_args_nr + minpf_args_nr + argc;
5e2485b1
DA
2077 rec_argv = calloc(rec_argc + 1, sizeof(char *));
2078
2079 if (rec_argv == NULL)
2080 return -ENOMEM;
2081
1e28fe0a 2082 j = 0;
5e2485b1 2083 for (i = 0; i < ARRAY_SIZE(record_args); i++)
1e28fe0a
SF
2084 rec_argv[j++] = record_args[i];
2085
e281a960
SF
2086 if (trace->trace_syscalls) {
2087 for (i = 0; i < sc_args_nr; i++)
2088 rec_argv[j++] = sc_args[i];
2089
2090 /* event string may be different for older kernels - e.g., RHEL6 */
2091 if (is_valid_tracepoint("raw_syscalls:sys_enter"))
2092 rec_argv[j++] = "raw_syscalls:sys_enter,raw_syscalls:sys_exit";
2093 else if (is_valid_tracepoint("syscalls:sys_enter"))
2094 rec_argv[j++] = "syscalls:sys_enter,syscalls:sys_exit";
2095 else {
2096 pr_err("Neither raw_syscalls nor syscalls events exist.\n");
2097 return -1;
2098 }
9aca7f17 2099 }
9aca7f17 2100
1e28fe0a
SF
2101 if (trace->trace_pgfaults & TRACE_PFMAJ)
2102 for (i = 0; i < majpf_args_nr; i++)
2103 rec_argv[j++] = majpf_args[i];
2104
2105 if (trace->trace_pgfaults & TRACE_PFMIN)
2106 for (i = 0; i < minpf_args_nr; i++)
2107 rec_argv[j++] = minpf_args[i];
2108
2109 for (i = 0; i < (unsigned int)argc; i++)
2110 rec_argv[j++] = argv[i];
5e2485b1 2111
b0ad8ea6 2112 return cmd_record(j, rec_argv);
5e2485b1
DA
2113}
2114
bf2575c1
DA
2115static size_t trace__fprintf_thread_summary(struct trace *trace, FILE *fp);
2116
08c98776 2117static bool perf_evlist__add_vfs_getname(struct perf_evlist *evlist)
c522739d 2118{
ef503831 2119 struct perf_evsel *evsel = perf_evsel__newtp("probe", "vfs_getname");
8dd2a131
JO
2120
2121 if (IS_ERR(evsel))
08c98776 2122 return false;
c522739d
ACM
2123
2124 if (perf_evsel__field(evsel, "pathname") == NULL) {
2125 perf_evsel__delete(evsel);
08c98776 2126 return false;
c522739d
ACM
2127 }
2128
744a9719 2129 evsel->handler = trace__vfs_getname;
c522739d 2130 perf_evlist__add(evlist, evsel);
08c98776 2131 return true;
c522739d
ACM
2132}
2133
0ae537cb 2134static struct perf_evsel *perf_evsel__new_pgfault(u64 config)
598d02c5
SF
2135{
2136 struct perf_evsel *evsel;
2137 struct perf_event_attr attr = {
2138 .type = PERF_TYPE_SOFTWARE,
2139 .mmap_data = 1,
598d02c5
SF
2140 };
2141
2142 attr.config = config;
0524798c 2143 attr.sample_period = 1;
598d02c5
SF
2144
2145 event_attr_init(&attr);
2146
2147 evsel = perf_evsel__new(&attr);
0ae537cb
ACM
2148 if (evsel)
2149 evsel->handler = trace__pgfault;
598d02c5 2150
0ae537cb 2151 return evsel;
598d02c5
SF
2152}
2153
ddbb1b13
ACM
2154static void trace__handle_event(struct trace *trace, union perf_event *event, struct perf_sample *sample)
2155{
2156 const u32 type = event->header.type;
2157 struct perf_evsel *evsel;
2158
ddbb1b13
ACM
2159 if (type != PERF_RECORD_SAMPLE) {
2160 trace__process_event(trace, trace->host, event, sample);
2161 return;
2162 }
2163
2164 evsel = perf_evlist__id2evsel(trace->evlist, sample->id);
2165 if (evsel == NULL) {
2166 fprintf(trace->output, "Unknown tp ID %" PRIu64 ", skipping...\n", sample->id);
2167 return;
2168 }
2169
e6001980
ACM
2170 trace__set_base_time(trace, evsel, sample);
2171
ddbb1b13
ACM
2172 if (evsel->attr.type == PERF_TYPE_TRACEPOINT &&
2173 sample->raw_data == NULL) {
2174 fprintf(trace->output, "%s sample with no payload for tid: %d, cpu %d, raw_size=%d, skipping...\n",
2175 perf_evsel__name(evsel), sample->tid,
2176 sample->cpu, sample->raw_size);
2177 } else {
2178 tracepoint_handler handler = evsel->handler;
2179 handler(trace, evsel, event, sample);
2180 }
2181}
2182
c27366f0
ACM
2183static int trace__add_syscall_newtp(struct trace *trace)
2184{
2185 int ret = -1;
2186 struct perf_evlist *evlist = trace->evlist;
2187 struct perf_evsel *sys_enter, *sys_exit;
2188
2189 sys_enter = perf_evsel__syscall_newtp("sys_enter", trace__sys_enter);
2190 if (sys_enter == NULL)
2191 goto out;
2192
2193 if (perf_evsel__init_sc_tp_ptr_field(sys_enter, args))
2194 goto out_delete_sys_enter;
2195
2196 sys_exit = perf_evsel__syscall_newtp("sys_exit", trace__sys_exit);
2197 if (sys_exit == NULL)
2198 goto out_delete_sys_enter;
2199
2200 if (perf_evsel__init_sc_tp_uint_field(sys_exit, ret))
2201 goto out_delete_sys_exit;
2202
2203 perf_evlist__add(evlist, sys_enter);
2204 perf_evlist__add(evlist, sys_exit);
2205
2ddd5c04 2206 if (callchain_param.enabled && !trace->kernel_syscallchains) {
44621819
ACM
2207 /*
2208 * We're interested only in the user space callchain
2209 * leading to the syscall, allow overriding that for
2210 * debugging reasons using --kernel_syscall_callchains
2211 */
2212 sys_exit->attr.exclude_callchain_kernel = 1;
2213 }
2214
8b3ce757
ACM
2215 trace->syscalls.events.sys_enter = sys_enter;
2216 trace->syscalls.events.sys_exit = sys_exit;
c27366f0
ACM
2217
2218 ret = 0;
2219out:
2220 return ret;
2221
2222out_delete_sys_exit:
2223 perf_evsel__delete_priv(sys_exit);
2224out_delete_sys_enter:
2225 perf_evsel__delete_priv(sys_enter);
2226 goto out;
2227}
2228
19867b61
ACM
2229static int trace__set_ev_qualifier_filter(struct trace *trace)
2230{
2231 int err = -1;
b15d0a4c 2232 struct perf_evsel *sys_exit;
19867b61
ACM
2233 char *filter = asprintf_expr_inout_ints("id", !trace->not_ev_qualifier,
2234 trace->ev_qualifier_ids.nr,
2235 trace->ev_qualifier_ids.entries);
2236
2237 if (filter == NULL)
2238 goto out_enomem;
2239
3541c034
MP
2240 if (!perf_evsel__append_tp_filter(trace->syscalls.events.sys_enter,
2241 filter)) {
b15d0a4c 2242 sys_exit = trace->syscalls.events.sys_exit;
3541c034 2243 err = perf_evsel__append_tp_filter(sys_exit, filter);
b15d0a4c 2244 }
19867b61
ACM
2245
2246 free(filter);
2247out:
2248 return err;
2249out_enomem:
2250 errno = ENOMEM;
2251 goto out;
2252}
c27366f0 2253
f15eb531 2254static int trace__run(struct trace *trace, int argc, const char **argv)
514f1c67 2255{
14a052df 2256 struct perf_evlist *evlist = trace->evlist;
0ae537cb 2257 struct perf_evsel *evsel, *pgfault_maj = NULL, *pgfault_min = NULL;
efd5745e
ACM
2258 int err = -1, i;
2259 unsigned long before;
f15eb531 2260 const bool forks = argc > 0;
46fb3c21 2261 bool draining = false;
514f1c67 2262
75b757ca
ACM
2263 trace->live = true;
2264
c27366f0 2265 if (trace->trace_syscalls && trace__add_syscall_newtp(trace))
801c67b0 2266 goto out_error_raw_syscalls;
514f1c67 2267
e281a960 2268 if (trace->trace_syscalls)
08c98776 2269 trace->vfs_getname = perf_evlist__add_vfs_getname(evlist);
c522739d 2270
0ae537cb
ACM
2271 if ((trace->trace_pgfaults & TRACE_PFMAJ)) {
2272 pgfault_maj = perf_evsel__new_pgfault(PERF_COUNT_SW_PAGE_FAULTS_MAJ);
2273 if (pgfault_maj == NULL)
2274 goto out_error_mem;
2275 perf_evlist__add(evlist, pgfault_maj);
e2726d99 2276 }
598d02c5 2277
0ae537cb
ACM
2278 if ((trace->trace_pgfaults & TRACE_PFMIN)) {
2279 pgfault_min = perf_evsel__new_pgfault(PERF_COUNT_SW_PAGE_FAULTS_MIN);
2280 if (pgfault_min == NULL)
2281 goto out_error_mem;
2282 perf_evlist__add(evlist, pgfault_min);
2283 }
598d02c5 2284
1302d88e 2285 if (trace->sched &&
2cc990ba
ACM
2286 perf_evlist__add_newtp(evlist, "sched", "sched_stat_runtime",
2287 trace__sched_stat_runtime))
2288 goto out_error_sched_stat_runtime;
1302d88e 2289
514f1c67
ACM
2290 err = perf_evlist__create_maps(evlist, &trace->opts.target);
2291 if (err < 0) {
c24ff998 2292 fprintf(trace->output, "Problems parsing the target to trace, check your options!\n");
514f1c67
ACM
2293 goto out_delete_evlist;
2294 }
2295
752fde44
ACM
2296 err = trace__symbols_init(trace, evlist);
2297 if (err < 0) {
c24ff998 2298 fprintf(trace->output, "Problems initializing symbol libraries!\n");
03ad9747 2299 goto out_delete_evlist;
752fde44
ACM
2300 }
2301
fde54b78
ACM
2302 perf_evlist__config(evlist, &trace->opts, NULL);
2303
0c3a6ef4
ACM
2304 if (callchain_param.enabled) {
2305 bool use_identifier = false;
2306
2307 if (trace->syscalls.events.sys_exit) {
2308 perf_evsel__config_callchain(trace->syscalls.events.sys_exit,
2309 &trace->opts, &callchain_param);
2310 use_identifier = true;
2311 }
2312
2313 if (pgfault_maj) {
2314 perf_evsel__config_callchain(pgfault_maj, &trace->opts, &callchain_param);
2315 use_identifier = true;
2316 }
2317
2318 if (pgfault_min) {
2319 perf_evsel__config_callchain(pgfault_min, &trace->opts, &callchain_param);
2320 use_identifier = true;
2321 }
2322
2323 if (use_identifier) {
2324 /*
2325 * Now we have evsels with different sample_ids, use
2326 * PERF_SAMPLE_IDENTIFIER to map from sample to evsel
2327 * from a fixed position in each ring buffer record.
2328 *
2329 * As of this the changeset introducing this comment, this
2330 * isn't strictly needed, as the fields that can come before
2331 * PERF_SAMPLE_ID are all used, but we'll probably disable
2332 * some of those for things like copying the payload of
2333 * pointer syscall arguments, and for vfs_getname we don't
2334 * need PERF_SAMPLE_ADDR and PERF_SAMPLE_IP, so do this
2335 * here as a warning we need to use PERF_SAMPLE_IDENTIFIER.
2336 */
2337 perf_evlist__set_sample_bit(evlist, IDENTIFIER);
2338 perf_evlist__reset_sample_bit(evlist, ID);
2339 }
fde54b78 2340 }
514f1c67 2341
f15eb531
NK
2342 signal(SIGCHLD, sig_handler);
2343 signal(SIGINT, sig_handler);
2344
2345 if (forks) {
6ef73ec4 2346 err = perf_evlist__prepare_workload(evlist, &trace->opts.target,
735f7e0b 2347 argv, false, NULL);
f15eb531 2348 if (err < 0) {
c24ff998 2349 fprintf(trace->output, "Couldn't run the workload!\n");
03ad9747 2350 goto out_delete_evlist;
f15eb531
NK
2351 }
2352 }
2353
514f1c67 2354 err = perf_evlist__open(evlist);
a8f23d8f
ACM
2355 if (err < 0)
2356 goto out_error_open;
514f1c67 2357
ba504235
WN
2358 err = bpf__apply_obj_config();
2359 if (err) {
2360 char errbuf[BUFSIZ];
2361
2362 bpf__strerror_apply_obj_config(err, errbuf, sizeof(errbuf));
2363 pr_err("ERROR: Apply config to BPF failed: %s\n",
2364 errbuf);
2365 goto out_error_open;
2366 }
2367
241b057c
ACM
2368 /*
2369 * Better not use !target__has_task() here because we need to cover the
2370 * case where no threads were specified in the command line, but a
2371 * workload was, and in that case we will fill in the thread_map when
2372 * we fork the workload in perf_evlist__prepare_workload.
2373 */
f078c385
ACM
2374 if (trace->filter_pids.nr > 0)
2375 err = perf_evlist__set_filter_pids(evlist, trace->filter_pids.nr, trace->filter_pids.entries);
e13798c7 2376 else if (thread_map__pid(evlist->threads, 0) == -1)
f078c385
ACM
2377 err = perf_evlist__set_filter_pid(evlist, getpid());
2378
94ad89bc
ACM
2379 if (err < 0)
2380 goto out_error_mem;
2381
19867b61
ACM
2382 if (trace->ev_qualifier_ids.nr > 0) {
2383 err = trace__set_ev_qualifier_filter(trace);
2384 if (err < 0)
2385 goto out_errno;
19867b61 2386
2e5e5f87
ACM
2387 pr_debug("event qualifier tracepoint filter: %s\n",
2388 trace->syscalls.events.sys_exit->filter);
2389 }
19867b61 2390
94ad89bc
ACM
2391 err = perf_evlist__apply_filters(evlist, &evsel);
2392 if (err < 0)
2393 goto out_error_apply_filters;
241b057c 2394
f885037e 2395 err = perf_evlist__mmap(evlist, trace->opts.mmap_pages, false);
e09b18d4
ACM
2396 if (err < 0)
2397 goto out_error_mmap;
514f1c67 2398
e36b7821 2399 if (!target__none(&trace->opts.target) && !trace->opts.initial_delay)
cb24d01d
ACM
2400 perf_evlist__enable(evlist);
2401
f15eb531
NK
2402 if (forks)
2403 perf_evlist__start_workload(evlist);
2404
e36b7821
AB
2405 if (trace->opts.initial_delay) {
2406 usleep(trace->opts.initial_delay * 1000);
2407 perf_evlist__enable(evlist);
2408 }
2409
e13798c7 2410 trace->multiple_threads = thread_map__pid(evlist->threads, 0) == -1 ||
42052bea
ACM
2411 evlist->threads->nr > 1 ||
2412 perf_evlist__first(evlist)->attr.inherit;
514f1c67 2413again:
efd5745e 2414 before = trace->nr_events;
514f1c67
ACM
2415
2416 for (i = 0; i < evlist->nr_mmaps; i++) {
2417 union perf_event *event;
2418
2419 while ((event = perf_evlist__mmap_read(evlist, i)) != NULL) {
514f1c67 2420 struct perf_sample sample;
514f1c67 2421
efd5745e 2422 ++trace->nr_events;
514f1c67 2423
514f1c67
ACM
2424 err = perf_evlist__parse_sample(evlist, event, &sample);
2425 if (err) {
c24ff998 2426 fprintf(trace->output, "Can't parse sample, err = %d, skipping...\n", err);
8e50d384 2427 goto next_event;
514f1c67
ACM
2428 }
2429
ddbb1b13 2430 trace__handle_event(trace, event, &sample);
8e50d384
ZZ
2431next_event:
2432 perf_evlist__mmap_consume(evlist, i);
20c5f10e 2433
ba209f85
ACM
2434 if (interrupted)
2435 goto out_disable;
02ac5421
ACM
2436
2437 if (done && !draining) {
2438 perf_evlist__disable(evlist);
2439 draining = true;
2440 }
514f1c67
ACM
2441 }
2442 }
2443
efd5745e 2444 if (trace->nr_events == before) {
ba209f85 2445 int timeout = done ? 100 : -1;
f15eb531 2446
46fb3c21
ACM
2447 if (!draining && perf_evlist__poll(evlist, timeout) > 0) {
2448 if (perf_evlist__filter_pollfd(evlist, POLLERR | POLLHUP) == 0)
2449 draining = true;
2450
ba209f85 2451 goto again;
46fb3c21 2452 }
ba209f85
ACM
2453 } else {
2454 goto again;
f15eb531
NK
2455 }
2456
ba209f85 2457out_disable:
f3b623b8
ACM
2458 thread__zput(trace->current);
2459
ba209f85 2460 perf_evlist__disable(evlist);
514f1c67 2461
c522739d
ACM
2462 if (!err) {
2463 if (trace->summary)
2464 trace__fprintf_thread_summary(trace, trace->output);
2465
2466 if (trace->show_tool_stats) {
2467 fprintf(trace->output, "Stats:\n "
2468 " vfs_getname : %" PRIu64 "\n"
2469 " proc_getname: %" PRIu64 "\n",
2470 trace->stats.vfs_getname,
2471 trace->stats.proc_getname);
2472 }
2473 }
bf2575c1 2474
514f1c67
ACM
2475out_delete_evlist:
2476 perf_evlist__delete(evlist);
14a052df 2477 trace->evlist = NULL;
75b757ca 2478 trace->live = false;
514f1c67 2479 return err;
6ef068cb
ACM
2480{
2481 char errbuf[BUFSIZ];
a8f23d8f 2482
2cc990ba 2483out_error_sched_stat_runtime:
988bdb31 2484 tracing_path__strerror_open_tp(errno, errbuf, sizeof(errbuf), "sched", "sched_stat_runtime");
2cc990ba
ACM
2485 goto out_error;
2486
801c67b0 2487out_error_raw_syscalls:
988bdb31 2488 tracing_path__strerror_open_tp(errno, errbuf, sizeof(errbuf), "raw_syscalls", "sys_(enter|exit)");
a8f23d8f
ACM
2489 goto out_error;
2490
e09b18d4
ACM
2491out_error_mmap:
2492 perf_evlist__strerror_mmap(evlist, errno, errbuf, sizeof(errbuf));
2493 goto out_error;
2494
a8f23d8f
ACM
2495out_error_open:
2496 perf_evlist__strerror_open(evlist, errno, errbuf, sizeof(errbuf));
2497
2498out_error:
6ef068cb 2499 fprintf(trace->output, "%s\n", errbuf);
87f91868 2500 goto out_delete_evlist;
94ad89bc
ACM
2501
2502out_error_apply_filters:
2503 fprintf(trace->output,
2504 "Failed to set filter \"%s\" on event %s with %d (%s)\n",
2505 evsel->filter, perf_evsel__name(evsel), errno,
c8b5f2c9 2506 str_error_r(errno, errbuf, sizeof(errbuf)));
94ad89bc 2507 goto out_delete_evlist;
514f1c67 2508}
5ed08dae
ACM
2509out_error_mem:
2510 fprintf(trace->output, "Not enough memory to run!\n");
2511 goto out_delete_evlist;
19867b61
ACM
2512
2513out_errno:
2514 fprintf(trace->output, "errno=%d,%s\n", errno, strerror(errno));
2515 goto out_delete_evlist;
a8f23d8f 2516}
514f1c67 2517
6810fc91
DA
2518static int trace__replay(struct trace *trace)
2519{
2520 const struct perf_evsel_str_handler handlers[] = {
c522739d 2521 { "probe:vfs_getname", trace__vfs_getname, },
6810fc91 2522 };
f5fc1412
JO
2523 struct perf_data_file file = {
2524 .path = input_name,
2525 .mode = PERF_DATA_MODE_READ,
e366a6d8 2526 .force = trace->force,
f5fc1412 2527 };
6810fc91 2528 struct perf_session *session;
003824e8 2529 struct perf_evsel *evsel;
6810fc91
DA
2530 int err = -1;
2531
2532 trace->tool.sample = trace__process_sample;
2533 trace->tool.mmap = perf_event__process_mmap;
384c671e 2534 trace->tool.mmap2 = perf_event__process_mmap2;
6810fc91
DA
2535 trace->tool.comm = perf_event__process_comm;
2536 trace->tool.exit = perf_event__process_exit;
2537 trace->tool.fork = perf_event__process_fork;
2538 trace->tool.attr = perf_event__process_attr;
f3b3614a 2539 trace->tool.tracing_data = perf_event__process_tracing_data;
6810fc91 2540 trace->tool.build_id = perf_event__process_build_id;
f3b3614a 2541 trace->tool.namespaces = perf_event__process_namespaces;
6810fc91 2542
0a8cb85c 2543 trace->tool.ordered_events = true;
6810fc91
DA
2544 trace->tool.ordering_requires_timestamps = true;
2545
2546 /* add tid to output */
2547 trace->multiple_threads = true;
2548
f5fc1412 2549 session = perf_session__new(&file, false, &trace->tool);
6810fc91 2550 if (session == NULL)
52e02834 2551 return -1;
6810fc91 2552
aa07df6e
DA
2553 if (trace->opts.target.pid)
2554 symbol_conf.pid_list_str = strdup(trace->opts.target.pid);
2555
2556 if (trace->opts.target.tid)
2557 symbol_conf.tid_list_str = strdup(trace->opts.target.tid);
2558
0a7e6d1b 2559 if (symbol__init(&session->header.env) < 0)
cb2ffae2
NK
2560 goto out;
2561
8fb598e5
DA
2562 trace->host = &session->machines.host;
2563
6810fc91
DA
2564 err = perf_session__set_tracepoints_handlers(session, handlers);
2565 if (err)
2566 goto out;
2567
003824e8
NK
2568 evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
2569 "raw_syscalls:sys_enter");
9aca7f17
DA
2570 /* older kernels have syscalls tp versus raw_syscalls */
2571 if (evsel == NULL)
2572 evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
2573 "syscalls:sys_enter");
003824e8 2574
e281a960
SF
2575 if (evsel &&
2576 (perf_evsel__init_syscall_tp(evsel, trace__sys_enter) < 0 ||
2577 perf_evsel__init_sc_tp_ptr_field(evsel, args))) {
003824e8
NK
2578 pr_err("Error during initialize raw_syscalls:sys_enter event\n");
2579 goto out;
2580 }
2581
2582 evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
2583 "raw_syscalls:sys_exit");
9aca7f17
DA
2584 if (evsel == NULL)
2585 evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
2586 "syscalls:sys_exit");
e281a960
SF
2587 if (evsel &&
2588 (perf_evsel__init_syscall_tp(evsel, trace__sys_exit) < 0 ||
2589 perf_evsel__init_sc_tp_uint_field(evsel, ret))) {
003824e8 2590 pr_err("Error during initialize raw_syscalls:sys_exit event\n");
6810fc91
DA
2591 goto out;
2592 }
2593
e5cadb93 2594 evlist__for_each_entry(session->evlist, evsel) {
1e28fe0a
SF
2595 if (evsel->attr.type == PERF_TYPE_SOFTWARE &&
2596 (evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS_MAJ ||
2597 evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS_MIN ||
2598 evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS))
2599 evsel->handler = trace__pgfault;
2600 }
2601
6810fc91
DA
2602 setup_pager();
2603
b7b61cbe 2604 err = perf_session__process_events(session);
6810fc91
DA
2605 if (err)
2606 pr_err("Failed to process events, error %d", err);
2607
bf2575c1
DA
2608 else if (trace->summary)
2609 trace__fprintf_thread_summary(trace, trace->output);
2610
6810fc91
DA
2611out:
2612 perf_session__delete(session);
2613
2614 return err;
2615}
2616
1302d88e
ACM
2617static size_t trace__fprintf_threads_header(FILE *fp)
2618{
2619 size_t printed;
2620
99ff7150 2621 printed = fprintf(fp, "\n Summary of events:\n\n");
bf2575c1
DA
2622
2623 return printed;
2624}
2625
b535d523
ACM
2626DEFINE_RESORT_RB(syscall_stats, a->msecs > b->msecs,
2627 struct stats *stats;
2628 double msecs;
2629 int syscall;
2630)
2631{
2632 struct int_node *source = rb_entry(nd, struct int_node, rb_node);
2633 struct stats *stats = source->priv;
2634
2635 entry->syscall = source->i;
2636 entry->stats = stats;
2637 entry->msecs = stats ? (u64)stats->n * (avg_stats(stats) / NSEC_PER_MSEC) : 0;
2638}
2639
bf2575c1
DA
2640static size_t thread__dump_stats(struct thread_trace *ttrace,
2641 struct trace *trace, FILE *fp)
2642{
bf2575c1
DA
2643 size_t printed = 0;
2644 struct syscall *sc;
b535d523
ACM
2645 struct rb_node *nd;
2646 DECLARE_RESORT_RB_INTLIST(syscall_stats, ttrace->syscall_stats);
bf2575c1 2647
b535d523 2648 if (syscall_stats == NULL)
bf2575c1
DA
2649 return 0;
2650
2651 printed += fprintf(fp, "\n");
2652
834fd46d
MW
2653 printed += fprintf(fp, " syscall calls total min avg max stddev\n");
2654 printed += fprintf(fp, " (msec) (msec) (msec) (msec) (%%)\n");
2655 printed += fprintf(fp, " --------------- -------- --------- --------- --------- --------- ------\n");
99ff7150 2656
98a91837 2657 resort_rb__for_each_entry(nd, syscall_stats) {
b535d523 2658 struct stats *stats = syscall_stats_entry->stats;
bf2575c1
DA
2659 if (stats) {
2660 double min = (double)(stats->min) / NSEC_PER_MSEC;
2661 double max = (double)(stats->max) / NSEC_PER_MSEC;
2662 double avg = avg_stats(stats);
2663 double pct;
2664 u64 n = (u64) stats->n;
2665
2666 pct = avg ? 100.0 * stddev_stats(stats)/avg : 0.0;
2667 avg /= NSEC_PER_MSEC;
2668
b535d523 2669 sc = &trace->syscalls.table[syscall_stats_entry->syscall];
99ff7150 2670 printed += fprintf(fp, " %-15s", sc->name);
834fd46d 2671 printed += fprintf(fp, " %8" PRIu64 " %9.3f %9.3f %9.3f",
b535d523 2672 n, syscall_stats_entry->msecs, min, avg);
27a778b5 2673 printed += fprintf(fp, " %9.3f %9.2f%%\n", max, pct);
bf2575c1 2674 }
bf2575c1
DA
2675 }
2676
b535d523 2677 resort_rb__delete(syscall_stats);
bf2575c1 2678 printed += fprintf(fp, "\n\n");
1302d88e
ACM
2679
2680 return printed;
2681}
2682
96c14451 2683static size_t trace__fprintf_thread(FILE *fp, struct thread *thread, struct trace *trace)
896cbb56 2684{
96c14451 2685 size_t printed = 0;
89dceb22 2686 struct thread_trace *ttrace = thread__priv(thread);
896cbb56
DA
2687 double ratio;
2688
2689 if (ttrace == NULL)
2690 return 0;
2691
2692 ratio = (double)ttrace->nr_events / trace->nr_events * 100.0;
2693
15e65c69 2694 printed += fprintf(fp, " %s (%d), ", thread__comm_str(thread), thread->tid);
99ff7150 2695 printed += fprintf(fp, "%lu events, ", ttrace->nr_events);
15e65c69 2696 printed += fprintf(fp, "%.1f%%", ratio);
a2ea67d7
SF
2697 if (ttrace->pfmaj)
2698 printed += fprintf(fp, ", %lu majfaults", ttrace->pfmaj);
2699 if (ttrace->pfmin)
2700 printed += fprintf(fp, ", %lu minfaults", ttrace->pfmin);
03548ebf
ACM
2701 if (trace->sched)
2702 printed += fprintf(fp, ", %.3f msec\n", ttrace->runtime_ms);
2703 else if (fputc('\n', fp) != EOF)
2704 ++printed;
2705
bf2575c1 2706 printed += thread__dump_stats(ttrace, trace, fp);
896cbb56 2707
96c14451
ACM
2708 return printed;
2709}
896cbb56 2710
96c14451
ACM
2711static unsigned long thread__nr_events(struct thread_trace *ttrace)
2712{
2713 return ttrace ? ttrace->nr_events : 0;
2714}
2715
2716DEFINE_RESORT_RB(threads, (thread__nr_events(a->thread->priv) < thread__nr_events(b->thread->priv)),
2717 struct thread *thread;
2718)
2719{
2720 entry->thread = rb_entry(nd, struct thread, rb_node);
896cbb56
DA
2721}
2722
1302d88e
ACM
2723static size_t trace__fprintf_thread_summary(struct trace *trace, FILE *fp)
2724{
96c14451
ACM
2725 DECLARE_RESORT_RB_MACHINE_THREADS(threads, trace->host);
2726 size_t printed = trace__fprintf_threads_header(fp);
2727 struct rb_node *nd;
1302d88e 2728
96c14451
ACM
2729 if (threads == NULL) {
2730 fprintf(fp, "%s", "Error sorting output by nr_events!\n");
2731 return 0;
2732 }
2733
98a91837 2734 resort_rb__for_each_entry(nd, threads)
96c14451 2735 printed += trace__fprintf_thread(fp, threads_entry->thread, trace);
896cbb56 2736
96c14451
ACM
2737 resort_rb__delete(threads);
2738
2739 return printed;
1302d88e
ACM
2740}
2741
ae9ed035
ACM
2742static int trace__set_duration(const struct option *opt, const char *str,
2743 int unset __maybe_unused)
2744{
2745 struct trace *trace = opt->value;
2746
2747 trace->duration_filter = atof(str);
2748 return 0;
2749}
2750
f078c385
ACM
2751static int trace__set_filter_pids(const struct option *opt, const char *str,
2752 int unset __maybe_unused)
2753{
2754 int ret = -1;
2755 size_t i;
2756 struct trace *trace = opt->value;
2757 /*
2758 * FIXME: introduce a intarray class, plain parse csv and create a
2759 * { int nr, int entries[] } struct...
2760 */
2761 struct intlist *list = intlist__new(str);
2762
2763 if (list == NULL)
2764 return -1;
2765
2766 i = trace->filter_pids.nr = intlist__nr_entries(list) + 1;
2767 trace->filter_pids.entries = calloc(i, sizeof(pid_t));
2768
2769 if (trace->filter_pids.entries == NULL)
2770 goto out;
2771
2772 trace->filter_pids.entries[0] = getpid();
2773
2774 for (i = 1; i < trace->filter_pids.nr; ++i)
2775 trace->filter_pids.entries[i] = intlist__entry(list, i - 1)->i;
2776
2777 intlist__delete(list);
2778 ret = 0;
2779out:
2780 return ret;
2781}
2782
c24ff998
ACM
2783static int trace__open_output(struct trace *trace, const char *filename)
2784{
2785 struct stat st;
2786
2787 if (!stat(filename, &st) && st.st_size) {
2788 char oldname[PATH_MAX];
2789
2790 scnprintf(oldname, sizeof(oldname), "%s.old", filename);
2791 unlink(oldname);
2792 rename(filename, oldname);
2793 }
2794
2795 trace->output = fopen(filename, "w");
2796
2797 return trace->output == NULL ? -errno : 0;
2798}
2799
598d02c5
SF
2800static int parse_pagefaults(const struct option *opt, const char *str,
2801 int unset __maybe_unused)
2802{
2803 int *trace_pgfaults = opt->value;
2804
2805 if (strcmp(str, "all") == 0)
2806 *trace_pgfaults |= TRACE_PFMAJ | TRACE_PFMIN;
2807 else if (strcmp(str, "maj") == 0)
2808 *trace_pgfaults |= TRACE_PFMAJ;
2809 else if (strcmp(str, "min") == 0)
2810 *trace_pgfaults |= TRACE_PFMIN;
2811 else
2812 return -1;
2813
2814 return 0;
2815}
2816
14a052df
ACM
2817static void evlist__set_evsel_handler(struct perf_evlist *evlist, void *handler)
2818{
2819 struct perf_evsel *evsel;
2820
e5cadb93 2821 evlist__for_each_entry(evlist, evsel)
14a052df
ACM
2822 evsel->handler = handler;
2823}
2824
017037ff
ACM
2825/*
2826 * XXX: Hackish, just splitting the combined -e+--event (syscalls
2827 * (raw_syscalls:{sys_{enter,exit}} + events (tracepoints, HW, SW, etc) to use
2828 * existing facilities unchanged (trace->ev_qualifier + parse_options()).
2829 *
2830 * It'd be better to introduce a parse_options() variant that would return a
2831 * list with the terms it didn't match to an event...
2832 */
2833static int trace__parse_events_option(const struct option *opt, const char *str,
2834 int unset __maybe_unused)
2835{
2836 struct trace *trace = (struct trace *)opt->value;
2837 const char *s = str;
2838 char *sep = NULL, *lists[2] = { NULL, NULL, };
2839 int len = strlen(str), err = -1, list;
2840 char *strace_groups_dir = system_path(STRACE_GROUPS_DIR);
2841 char group_name[PATH_MAX];
2842
2843 if (strace_groups_dir == NULL)
2844 return -1;
2845
2846 if (*s == '!') {
2847 ++s;
2848 trace->not_ev_qualifier = true;
2849 }
2850
2851 while (1) {
2852 if ((sep = strchr(s, ',')) != NULL)
2853 *sep = '\0';
2854
2855 list = 0;
2856 if (syscalltbl__id(trace->sctbl, s) >= 0) {
2857 list = 1;
2858 } else {
2859 path__join(group_name, sizeof(group_name), strace_groups_dir, s);
2860 if (access(group_name, R_OK) == 0)
2861 list = 1;
2862 }
2863
2864 if (lists[list]) {
2865 sprintf(lists[list] + strlen(lists[list]), ",%s", s);
2866 } else {
2867 lists[list] = malloc(len);
2868 if (lists[list] == NULL)
2869 goto out;
2870 strcpy(lists[list], s);
2871 }
2872
2873 if (!sep)
2874 break;
2875
2876 *sep = ',';
2877 s = sep + 1;
2878 }
2879
2880 if (lists[1] != NULL) {
2881 struct strlist_config slist_config = {
2882 .dirname = strace_groups_dir,
2883 };
2884
2885 trace->ev_qualifier = strlist__new(lists[1], &slist_config);
2886 if (trace->ev_qualifier == NULL) {
2887 fputs("Not enough memory to parse event qualifier", trace->output);
2888 goto out;
2889 }
2890
2891 if (trace__validate_ev_qualifier(trace))
2892 goto out;
2893 }
2894
2895 err = 0;
2896
2897 if (lists[0]) {
2898 struct option o = OPT_CALLBACK('e', "event", &trace->evlist, "event",
2899 "event selector. use 'perf list' to list available events",
2900 parse_events_option);
2901 err = parse_events_option(&o, lists[0], 0);
2902 }
2903out:
2904 if (sep)
2905 *sep = ',';
2906
2907 return err;
2908}
2909
b0ad8ea6 2910int cmd_trace(int argc, const char **argv)
514f1c67 2911{
6fdd9cb7 2912 const char *trace_usage[] = {
f15eb531
NK
2913 "perf trace [<options>] [<command>]",
2914 "perf trace [<options>] -- <command> [<options>]",
5e2485b1
DA
2915 "perf trace record [<options>] [<command>]",
2916 "perf trace record [<options>] -- <command> [<options>]",
514f1c67
ACM
2917 NULL
2918 };
2919 struct trace trace = {
514f1c67
ACM
2920 .syscalls = {
2921 . max = -1,
2922 },
2923 .opts = {
2924 .target = {
2925 .uid = UINT_MAX,
2926 .uses_mmap = true,
2927 },
2928 .user_freq = UINT_MAX,
2929 .user_interval = ULLONG_MAX,
509051ea 2930 .no_buffering = true,
38d5447d 2931 .mmap_pages = UINT_MAX,
9d9cad76 2932 .proc_map_timeout = 500,
514f1c67 2933 },
007d66a0 2934 .output = stderr,
50c95cbd 2935 .show_comm = true,
e281a960 2936 .trace_syscalls = true,
44621819 2937 .kernel_syscallchains = false,
05614993 2938 .max_stack = UINT_MAX,
514f1c67 2939 };
c24ff998 2940 const char *output_name = NULL;
514f1c67 2941 const struct option trace_options[] = {
017037ff
ACM
2942 OPT_CALLBACK('e', "event", &trace, "event",
2943 "event/syscall selector. use 'perf list' to list available events",
2944 trace__parse_events_option),
50c95cbd
ACM
2945 OPT_BOOLEAN(0, "comm", &trace.show_comm,
2946 "show the thread COMM next to its id"),
c522739d 2947 OPT_BOOLEAN(0, "tool_stats", &trace.show_tool_stats, "show tool stats"),
017037ff
ACM
2948 OPT_CALLBACK(0, "expr", &trace, "expr", "list of syscalls/events to trace",
2949 trace__parse_events_option),
c24ff998 2950 OPT_STRING('o', "output", &output_name, "file", "output file name"),
6810fc91 2951 OPT_STRING('i', "input", &input_name, "file", "Analyze events in file"),
514f1c67
ACM
2952 OPT_STRING('p', "pid", &trace.opts.target.pid, "pid",
2953 "trace events on existing process id"),
ac9be8ee 2954 OPT_STRING('t', "tid", &trace.opts.target.tid, "tid",
514f1c67 2955 "trace events on existing thread id"),
fa0e4ffe
ACM
2956 OPT_CALLBACK(0, "filter-pids", &trace, "CSV list of pids",
2957 "pids to filter (by the kernel)", trace__set_filter_pids),
ac9be8ee 2958 OPT_BOOLEAN('a', "all-cpus", &trace.opts.target.system_wide,
514f1c67 2959 "system-wide collection from all CPUs"),
ac9be8ee 2960 OPT_STRING('C', "cpu", &trace.opts.target.cpu_list, "cpu",
514f1c67 2961 "list of cpus to monitor"),
6810fc91 2962 OPT_BOOLEAN(0, "no-inherit", &trace.opts.no_inherit,
514f1c67 2963 "child tasks do not inherit counters"),
994a1f78
JO
2964 OPT_CALLBACK('m', "mmap-pages", &trace.opts.mmap_pages, "pages",
2965 "number of mmap data pages",
2966 perf_evlist__parse_mmap_pages),
ac9be8ee 2967 OPT_STRING('u', "uid", &trace.opts.target.uid_str, "user",
514f1c67 2968 "user to profile"),
ae9ed035
ACM
2969 OPT_CALLBACK(0, "duration", &trace, "float",
2970 "show only events with duration > N.M ms",
2971 trace__set_duration),
1302d88e 2972 OPT_BOOLEAN(0, "sched", &trace.sched, "show blocking scheduler events"),
7c304ee0 2973 OPT_INCR('v', "verbose", &verbose, "be more verbose"),
4bb09192
DA
2974 OPT_BOOLEAN('T', "time", &trace.full_time,
2975 "Show full timestamp, not time relative to first start"),
fd2eabaf
DA
2976 OPT_BOOLEAN('s', "summary", &trace.summary_only,
2977 "Show only syscall summary with statistics"),
2978 OPT_BOOLEAN('S', "with-summary", &trace.summary,
2979 "Show all syscalls and summary with statistics"),
598d02c5
SF
2980 OPT_CALLBACK_DEFAULT('F', "pf", &trace.trace_pgfaults, "all|maj|min",
2981 "Trace pagefaults", parse_pagefaults, "maj"),
e281a960 2982 OPT_BOOLEAN(0, "syscalls", &trace.trace_syscalls, "Trace syscalls"),
e366a6d8 2983 OPT_BOOLEAN('f', "force", &trace.force, "don't complain, do it"),
566a0885
MW
2984 OPT_CALLBACK(0, "call-graph", &trace.opts,
2985 "record_mode[,record_size]", record_callchain_help,
2986 &record_parse_callchain_opt),
44621819
ACM
2987 OPT_BOOLEAN(0, "kernel-syscall-graph", &trace.kernel_syscallchains,
2988 "Show the kernel callchains on the syscall exit path"),
5cf9c84e
ACM
2989 OPT_UINTEGER(0, "min-stack", &trace.min_stack,
2990 "Set the minimum stack depth when parsing the callchain, "
2991 "anything below the specified depth will be ignored."),
c6d4a494
ACM
2992 OPT_UINTEGER(0, "max-stack", &trace.max_stack,
2993 "Set the maximum stack depth when parsing the callchain, "
2994 "anything beyond the specified depth will be ignored. "
4cb93446 2995 "Default: kernel.perf_event_max_stack or " __stringify(PERF_MAX_STACK_DEPTH)),
9d9cad76
KL
2996 OPT_UINTEGER(0, "proc-map-timeout", &trace.opts.proc_map_timeout,
2997 "per thread proc mmap processing timeout in ms"),
e36b7821
AB
2998 OPT_UINTEGER('D', "delay", &trace.opts.initial_delay,
2999 "ms to wait before starting measurement after program "
3000 "start"),
514f1c67
ACM
3001 OPT_END()
3002 };
ccd62a89 3003 bool __maybe_unused max_stack_user_set = true;
f3e459d1 3004 bool mmap_pages_user_set = true;
6fdd9cb7 3005 const char * const trace_subcommands[] = { "record", NULL };
514f1c67 3006 int err;
32caf0d1 3007 char bf[BUFSIZ];
514f1c67 3008
4d08cb80
ACM
3009 signal(SIGSEGV, sighandler_dump_stack);
3010 signal(SIGFPE, sighandler_dump_stack);
3011
14a052df 3012 trace.evlist = perf_evlist__new();
fd0db102 3013 trace.sctbl = syscalltbl__new();
14a052df 3014
fd0db102 3015 if (trace.evlist == NULL || trace.sctbl == NULL) {
14a052df 3016 pr_err("Not enough memory to run!\n");
ff8f695c 3017 err = -ENOMEM;
14a052df
ACM
3018 goto out;
3019 }
3020
6fdd9cb7
YS
3021 argc = parse_options_subcommand(argc, argv, trace_options, trace_subcommands,
3022 trace_usage, PARSE_OPT_STOP_AT_NON_OPTION);
fd2eabaf 3023
d7888573
WN
3024 err = bpf__setup_stdout(trace.evlist);
3025 if (err) {
3026 bpf__strerror_setup_stdout(trace.evlist, err, bf, sizeof(bf));
3027 pr_err("ERROR: Setup BPF stdout failed: %s\n", bf);
3028 goto out;
3029 }
3030
59247e33
ACM
3031 err = -1;
3032
598d02c5
SF
3033 if (trace.trace_pgfaults) {
3034 trace.opts.sample_address = true;
3035 trace.opts.sample_time = true;
3036 }
3037
f3e459d1
ACM
3038 if (trace.opts.mmap_pages == UINT_MAX)
3039 mmap_pages_user_set = false;
3040
05614993 3041 if (trace.max_stack == UINT_MAX) {
fe176085 3042 trace.max_stack = input_name ? PERF_MAX_STACK_DEPTH : sysctl_perf_event_max_stack;
05614993
ACM
3043 max_stack_user_set = false;
3044 }
3045
3046#ifdef HAVE_DWARF_UNWIND_SUPPORT
caa36ed7 3047 if ((trace.min_stack || max_stack_user_set) && !callchain_param.enabled && trace.trace_syscalls)
05614993
ACM
3048 record_opts__parse_callchain(&trace.opts, &callchain_param, "dwarf", false);
3049#endif
3050
2ddd5c04 3051 if (callchain_param.enabled) {
f3e459d1
ACM
3052 if (!mmap_pages_user_set && geteuid() == 0)
3053 trace.opts.mmap_pages = perf_event_mlock_kb_in_pages() * 4;
3054
566a0885 3055 symbol_conf.use_callchain = true;
f3e459d1 3056 }
566a0885 3057
14a052df
ACM
3058 if (trace.evlist->nr_entries > 0)
3059 evlist__set_evsel_handler(trace.evlist, trace__event_handler);
3060
1e28fe0a
SF
3061 if ((argc >= 1) && (strcmp(argv[0], "record") == 0))
3062 return trace__record(&trace, argc-1, &argv[1]);
3063
3064 /* summary_only implies summary option, but don't overwrite summary if set */
3065 if (trace.summary_only)
3066 trace.summary = trace.summary_only;
3067
726f3234
ACM
3068 if (!trace.trace_syscalls && !trace.trace_pgfaults &&
3069 trace.evlist->nr_entries == 0 /* Was --events used? */) {
e281a960
SF
3070 pr_err("Please specify something to trace.\n");
3071 return -1;
3072 }
3073
017037ff 3074 if (!trace.trace_syscalls && trace.ev_qualifier) {
59247e33
ACM
3075 pr_err("The -e option can't be used with --no-syscalls.\n");
3076 goto out;
3077 }
3078
c24ff998
ACM
3079 if (output_name != NULL) {
3080 err = trace__open_output(&trace, output_name);
3081 if (err < 0) {
3082 perror("failed to create output file");
3083 goto out;
3084 }
3085 }
3086
fd0db102
ACM
3087 trace.open_id = syscalltbl__id(trace.sctbl, "open");
3088
602ad878 3089 err = target__validate(&trace.opts.target);
32caf0d1 3090 if (err) {
602ad878 3091 target__strerror(&trace.opts.target, err, bf, sizeof(bf));
c24ff998
ACM
3092 fprintf(trace.output, "%s", bf);
3093 goto out_close;
32caf0d1
NK
3094 }
3095
602ad878 3096 err = target__parse_uid(&trace.opts.target);
514f1c67 3097 if (err) {
602ad878 3098 target__strerror(&trace.opts.target, err, bf, sizeof(bf));
c24ff998
ACM
3099 fprintf(trace.output, "%s", bf);
3100 goto out_close;
514f1c67
ACM
3101 }
3102
602ad878 3103 if (!argc && target__none(&trace.opts.target))
ee76120e
NK
3104 trace.opts.target.system_wide = true;
3105
6810fc91
DA
3106 if (input_name)
3107 err = trace__replay(&trace);
3108 else
3109 err = trace__run(&trace, argc, argv);
1302d88e 3110
c24ff998
ACM
3111out_close:
3112 if (output_name != NULL)
3113 fclose(trace.output);
3114out:
1302d88e 3115 return err;
514f1c67 3116}