]> git.proxmox.com Git - mirror_ubuntu-focal-kernel.git/blame - tools/perf/builtin-trace.c
perf bpf: Add linux/socket.h to the headers accessible to bpf proggies
[mirror_ubuntu-focal-kernel.git] / tools / perf / builtin-trace.c
CommitLineData
a598bb5e
ACM
1/*
2 * builtin-trace.c
3 *
4 * Builtin 'trace' command:
5 *
6 * Display a continuously updated trace of any workload, CPU, specific PID,
7 * system wide, etc. Default format is loosely strace like, but any other
8 * event may be specified using --event.
9 *
10 * Copyright (C) 2012, 2013, 2014, 2015 Red Hat Inc, Arnaldo Carvalho de Melo <acme@redhat.com>
11 *
12 * Initially based on the 'trace' prototype by Thomas Gleixner:
13 *
14 * http://lwn.net/Articles/415728/ ("Announcing a new utility: 'trace'")
15 *
16 * Released under the GPL v2. (and only v2, not any later version)
17 */
18
4e319027 19#include <traceevent/event-parse.h>
988bdb31 20#include <api/fs/tracing_path.h>
514f1c67 21#include "builtin.h"
9ea42ba4 22#include "util/cgroup.h"
752fde44 23#include "util/color.h"
7c304ee0 24#include "util/debug.h"
092bd3cd 25#include "util/env.h"
5ab8c689 26#include "util/event.h"
514f1c67 27#include "util/evlist.h"
4b6ab94e 28#include <subcmd/exec-cmd.h>
752fde44 29#include "util/machine.h"
9a3993d4 30#include "util/path.h"
6810fc91 31#include "util/session.h"
752fde44 32#include "util/thread.h"
4b6ab94e 33#include <subcmd/parse-options.h>
2ae3a312 34#include "util/strlist.h"
bdc89661 35#include "util/intlist.h"
514f1c67 36#include "util/thread_map.h"
bf2575c1 37#include "util/stat.h"
fd5cead2 38#include "trace/beauty/beauty.h"
97978b3e 39#include "trace-event.h"
9aca7f17 40#include "util/parse-events.h"
ba504235 41#include "util/bpf-loader.h"
566a0885 42#include "callchain.h"
fea01392 43#include "print_binary.h"
a067558e 44#include "string2.h"
fd0db102 45#include "syscalltbl.h"
96c14451 46#include "rb_resort.h"
514f1c67 47
a43783ae 48#include <errno.h>
fd20e811 49#include <inttypes.h>
4208735d 50#include <poll.h>
9607ad3a 51#include <signal.h>
514f1c67 52#include <stdlib.h>
017037ff 53#include <string.h>
8dd2a131 54#include <linux/err.h>
997bba8c 55#include <linux/filter.h>
877a7a11 56#include <linux/kernel.h>
39878d49 57#include <linux/random.h>
c6d4a494 58#include <linux/stringify.h>
bd48c63e 59#include <linux/time64.h>
bafae98e 60#include <fcntl.h>
514f1c67 61
3d689ed6
ACM
62#include "sane_ctype.h"
63
c188e7ac
ACM
64#ifndef O_CLOEXEC
65# define O_CLOEXEC 02000000
66#endif
67
83a51694
ACM
68#ifndef F_LINUX_SPECIFIC_BASE
69# define F_LINUX_SPECIFIC_BASE 1024
70#endif
71
d1d438a3
ACM
72struct trace {
73 struct perf_tool tool;
fd0db102 74 struct syscalltbl *sctbl;
d1d438a3
ACM
75 struct {
76 int max;
77 struct syscall *table;
78 struct {
79 struct perf_evsel *sys_enter,
d3d1c4bd
ACM
80 *sys_exit,
81 *augmented;
d1d438a3
ACM
82 } events;
83 } syscalls;
84 struct record_opts opts;
85 struct perf_evlist *evlist;
86 struct machine *host;
87 struct thread *current;
9ea42ba4 88 struct cgroup *cgroup;
d1d438a3
ACM
89 u64 base_time;
90 FILE *output;
91 unsigned long nr_events;
92 struct strlist *ev_qualifier;
93 struct {
94 size_t nr;
95 int *entries;
96 } ev_qualifier_ids;
d1d438a3
ACM
97 struct {
98 size_t nr;
99 pid_t *entries;
100 } filter_pids;
101 double duration_filter;
102 double runtime_ms;
103 struct {
104 u64 vfs_getname,
105 proc_getname;
106 } stats;
c6d4a494 107 unsigned int max_stack;
5cf9c84e 108 unsigned int min_stack;
d1d438a3
ACM
109 bool not_ev_qualifier;
110 bool live;
111 bool full_time;
112 bool sched;
113 bool multiple_threads;
114 bool summary;
115 bool summary_only;
0a6545bd 116 bool failure_only;
d1d438a3 117 bool show_comm;
591421e1 118 bool print_sample;
d1d438a3
ACM
119 bool show_tool_stats;
120 bool trace_syscalls;
44621819 121 bool kernel_syscallchains;
d1d438a3
ACM
122 bool force;
123 bool vfs_getname;
124 int trace_pgfaults;
125};
a1c2552d 126
77170988
ACM
127struct tp_field {
128 int offset;
129 union {
130 u64 (*integer)(struct tp_field *field, struct perf_sample *sample);
131 void *(*pointer)(struct tp_field *field, struct perf_sample *sample);
132 };
133};
134
135#define TP_UINT_FIELD(bits) \
136static u64 tp_field__u##bits(struct tp_field *field, struct perf_sample *sample) \
137{ \
55d43bca
DA
138 u##bits value; \
139 memcpy(&value, sample->raw_data + field->offset, sizeof(value)); \
140 return value; \
77170988
ACM
141}
142
143TP_UINT_FIELD(8);
144TP_UINT_FIELD(16);
145TP_UINT_FIELD(32);
146TP_UINT_FIELD(64);
147
148#define TP_UINT_FIELD__SWAPPED(bits) \
149static u64 tp_field__swapped_u##bits(struct tp_field *field, struct perf_sample *sample) \
150{ \
55d43bca
DA
151 u##bits value; \
152 memcpy(&value, sample->raw_data + field->offset, sizeof(value)); \
77170988
ACM
153 return bswap_##bits(value);\
154}
155
156TP_UINT_FIELD__SWAPPED(16);
157TP_UINT_FIELD__SWAPPED(32);
158TP_UINT_FIELD__SWAPPED(64);
159
aa823f58 160static int __tp_field__init_uint(struct tp_field *field, int size, int offset, bool needs_swap)
77170988 161{
aa823f58 162 field->offset = offset;
77170988 163
aa823f58 164 switch (size) {
77170988
ACM
165 case 1:
166 field->integer = tp_field__u8;
167 break;
168 case 2:
169 field->integer = needs_swap ? tp_field__swapped_u16 : tp_field__u16;
170 break;
171 case 4:
172 field->integer = needs_swap ? tp_field__swapped_u32 : tp_field__u32;
173 break;
174 case 8:
175 field->integer = needs_swap ? tp_field__swapped_u64 : tp_field__u64;
176 break;
177 default:
178 return -1;
179 }
180
181 return 0;
182}
183
aa823f58
ACM
184static int tp_field__init_uint(struct tp_field *field, struct format_field *format_field, bool needs_swap)
185{
186 return __tp_field__init_uint(field, format_field->size, format_field->offset, needs_swap);
187}
188
77170988
ACM
189static void *tp_field__ptr(struct tp_field *field, struct perf_sample *sample)
190{
191 return sample->raw_data + field->offset;
192}
193
aa823f58 194static int __tp_field__init_ptr(struct tp_field *field, int offset)
77170988 195{
aa823f58 196 field->offset = offset;
77170988
ACM
197 field->pointer = tp_field__ptr;
198 return 0;
199}
200
aa823f58
ACM
201static int tp_field__init_ptr(struct tp_field *field, struct format_field *format_field)
202{
203 return __tp_field__init_ptr(field, format_field->offset);
204}
205
77170988
ACM
206struct syscall_tp {
207 struct tp_field id;
208 union {
209 struct tp_field args, ret;
210 };
211};
212
213static int perf_evsel__init_tp_uint_field(struct perf_evsel *evsel,
214 struct tp_field *field,
215 const char *name)
216{
217 struct format_field *format_field = perf_evsel__field(evsel, name);
218
219 if (format_field == NULL)
220 return -1;
221
222 return tp_field__init_uint(field, format_field, evsel->needs_swap);
223}
224
225#define perf_evsel__init_sc_tp_uint_field(evsel, name) \
226 ({ struct syscall_tp *sc = evsel->priv;\
227 perf_evsel__init_tp_uint_field(evsel, &sc->name, #name); })
228
229static int perf_evsel__init_tp_ptr_field(struct perf_evsel *evsel,
230 struct tp_field *field,
231 const char *name)
232{
233 struct format_field *format_field = perf_evsel__field(evsel, name);
234
235 if (format_field == NULL)
236 return -1;
237
238 return tp_field__init_ptr(field, format_field);
239}
240
241#define perf_evsel__init_sc_tp_ptr_field(evsel, name) \
242 ({ struct syscall_tp *sc = evsel->priv;\
243 perf_evsel__init_tp_ptr_field(evsel, &sc->name, #name); })
244
245static void perf_evsel__delete_priv(struct perf_evsel *evsel)
246{
04662523 247 zfree(&evsel->priv);
77170988
ACM
248 perf_evsel__delete(evsel);
249}
250
d32855fa
ACM
251static int perf_evsel__init_syscall_tp(struct perf_evsel *evsel)
252{
253 struct syscall_tp *sc = evsel->priv = malloc(sizeof(struct syscall_tp));
254
255 if (evsel->priv != NULL) {
256 if (perf_evsel__init_tp_uint_field(evsel, &sc->id, "__syscall_nr"))
257 goto out_delete;
258 return 0;
259 }
260
261 return -ENOMEM;
262out_delete:
263 zfree(&evsel->priv);
264 return -ENOENT;
265}
266
d3d1c4bd
ACM
267static int perf_evsel__init_augmented_syscall_tp(struct perf_evsel *evsel)
268{
269 struct syscall_tp *sc = evsel->priv = malloc(sizeof(struct syscall_tp));
270
271 if (evsel->priv != NULL) { /* field, sizeof_field, offsetof_field */
272 if (__tp_field__init_uint(&sc->id, sizeof(long), sizeof(long long), evsel->needs_swap))
273 goto out_delete;
274
275 return 0;
276 }
277
278 return -ENOMEM;
279out_delete:
280 zfree(&evsel->priv);
281 return -EINVAL;
282}
283
284static int perf_evsel__init_augmented_syscall_tp_args(struct perf_evsel *evsel)
285{
286 struct syscall_tp *sc = evsel->priv;
287
288 return __tp_field__init_ptr(&sc->args, sc->id.offset + sizeof(u64));
289}
290
63f11c80 291static int perf_evsel__init_raw_syscall_tp(struct perf_evsel *evsel, void *handler)
96695d44
NK
292{
293 evsel->priv = malloc(sizeof(struct syscall_tp));
294 if (evsel->priv != NULL) {
295 if (perf_evsel__init_sc_tp_uint_field(evsel, id))
296 goto out_delete;
297
298 evsel->handler = handler;
299 return 0;
300 }
301
302 return -ENOMEM;
303
304out_delete:
04662523 305 zfree(&evsel->priv);
96695d44
NK
306 return -ENOENT;
307}
308
63f11c80 309static struct perf_evsel *perf_evsel__raw_syscall_newtp(const char *direction, void *handler)
77170988 310{
ef503831 311 struct perf_evsel *evsel = perf_evsel__newtp("raw_syscalls", direction);
77170988 312
9aca7f17 313 /* older kernel (e.g., RHEL6) use syscalls:{enter,exit} */
8dd2a131 314 if (IS_ERR(evsel))
9aca7f17
DA
315 evsel = perf_evsel__newtp("syscalls", direction);
316
8dd2a131
JO
317 if (IS_ERR(evsel))
318 return NULL;
319
63f11c80 320 if (perf_evsel__init_raw_syscall_tp(evsel, handler))
8dd2a131 321 goto out_delete;
77170988
ACM
322
323 return evsel;
324
325out_delete:
326 perf_evsel__delete_priv(evsel);
327 return NULL;
328}
329
330#define perf_evsel__sc_tp_uint(evsel, name, sample) \
331 ({ struct syscall_tp *fields = evsel->priv; \
332 fields->name.integer(&fields->name, sample); })
333
334#define perf_evsel__sc_tp_ptr(evsel, name, sample) \
335 ({ struct syscall_tp *fields = evsel->priv; \
336 fields->name.pointer(&fields->name, sample); })
337
0ae79636
ACM
338size_t strarray__scnprintf(struct strarray *sa, char *bf, size_t size, const char *intfmt, int val)
339{
340 int idx = val - sa->offset;
1f115cb7 341
bc972ada 342 if (idx < 0 || idx >= sa->nr_entries || sa->entries[idx] == NULL)
0ae79636 343 return scnprintf(bf, size, intfmt, val);
1f115cb7 344
0ae79636 345 return scnprintf(bf, size, "%s", sa->entries[idx]);
03e3adc9
ACM
346}
347
975b7c2f
ACM
348static size_t __syscall_arg__scnprintf_strarray(char *bf, size_t size,
349 const char *intfmt,
350 struct syscall_arg *arg)
1f115cb7 351{
0ae79636 352 return strarray__scnprintf(arg->parm, bf, size, intfmt, arg->val);
1f115cb7
ACM
353}
354
975b7c2f
ACM
355static size_t syscall_arg__scnprintf_strarray(char *bf, size_t size,
356 struct syscall_arg *arg)
357{
358 return __syscall_arg__scnprintf_strarray(bf, size, "%d", arg);
359}
360
1f115cb7
ACM
361#define SCA_STRARRAY syscall_arg__scnprintf_strarray
362
83a51694
ACM
363struct strarrays {
364 int nr_entries;
365 struct strarray **entries;
366};
367
368#define DEFINE_STRARRAYS(array) struct strarrays strarrays__##array = { \
369 .nr_entries = ARRAY_SIZE(array), \
370 .entries = array, \
371}
372
274e86fd
ACM
373size_t syscall_arg__scnprintf_strarrays(char *bf, size_t size,
374 struct syscall_arg *arg)
83a51694
ACM
375{
376 struct strarrays *sas = arg->parm;
377 int i;
378
379 for (i = 0; i < sas->nr_entries; ++i) {
380 struct strarray *sa = sas->entries[i];
381 int idx = arg->val - sa->offset;
382
383 if (idx >= 0 && idx < sa->nr_entries) {
384 if (sa->entries[idx] == NULL)
385 break;
386 return scnprintf(bf, size, "%s", sa->entries[idx]);
387 }
388 }
389
390 return scnprintf(bf, size, "%d", arg->val);
391}
392
48e1f91a
ACM
393#ifndef AT_FDCWD
394#define AT_FDCWD -100
395#endif
396
75b757ca
ACM
397static size_t syscall_arg__scnprintf_fd_at(char *bf, size_t size,
398 struct syscall_arg *arg)
399{
400 int fd = arg->val;
401
402 if (fd == AT_FDCWD)
403 return scnprintf(bf, size, "CWD");
404
405 return syscall_arg__scnprintf_fd(bf, size, arg);
406}
407
408#define SCA_FDAT syscall_arg__scnprintf_fd_at
409
410static size_t syscall_arg__scnprintf_close_fd(char *bf, size_t size,
411 struct syscall_arg *arg);
412
413#define SCA_CLOSE_FD syscall_arg__scnprintf_close_fd
414
2c2b1623 415size_t syscall_arg__scnprintf_hex(char *bf, size_t size, struct syscall_arg *arg)
13d4ff3e 416{
01533e97 417 return scnprintf(bf, size, "%#lx", arg->val);
13d4ff3e
ACM
418}
419
2c2b1623 420size_t syscall_arg__scnprintf_int(char *bf, size_t size, struct syscall_arg *arg)
a1c2552d
ACM
421{
422 return scnprintf(bf, size, "%d", arg->val);
423}
424
5dde91ed
ACM
425size_t syscall_arg__scnprintf_long(char *bf, size_t size, struct syscall_arg *arg)
426{
427 return scnprintf(bf, size, "%ld", arg->val);
428}
429
729a7841
ACM
430static const char *bpf_cmd[] = {
431 "MAP_CREATE", "MAP_LOOKUP_ELEM", "MAP_UPDATE_ELEM", "MAP_DELETE_ELEM",
432 "MAP_GET_NEXT_KEY", "PROG_LOAD",
433};
434static DEFINE_STRARRAY(bpf_cmd);
435
03e3adc9
ACM
436static const char *epoll_ctl_ops[] = { "ADD", "DEL", "MOD", };
437static DEFINE_STRARRAY_OFFSET(epoll_ctl_ops, 1);
eac032c5 438
1f115cb7
ACM
439static const char *itimers[] = { "REAL", "VIRTUAL", "PROF", };
440static DEFINE_STRARRAY(itimers);
441
b62bee1b
ACM
442static const char *keyctl_options[] = {
443 "GET_KEYRING_ID", "JOIN_SESSION_KEYRING", "UPDATE", "REVOKE", "CHOWN",
444 "SETPERM", "DESCRIBE", "CLEAR", "LINK", "UNLINK", "SEARCH", "READ",
445 "INSTANTIATE", "NEGATE", "SET_REQKEY_KEYRING", "SET_TIMEOUT",
446 "ASSUME_AUTHORITY", "GET_SECURITY", "SESSION_TO_PARENT", "REJECT",
447 "INSTANTIATE_IOV", "INVALIDATE", "GET_PERSISTENT",
448};
449static DEFINE_STRARRAY(keyctl_options);
450
efe6b882
ACM
451static const char *whences[] = { "SET", "CUR", "END",
452#ifdef SEEK_DATA
453"DATA",
454#endif
455#ifdef SEEK_HOLE
456"HOLE",
457#endif
458};
459static DEFINE_STRARRAY(whences);
f9da0b0c 460
80f587d5
ACM
461static const char *fcntl_cmds[] = {
462 "DUPFD", "GETFD", "SETFD", "GETFL", "SETFL", "GETLK", "SETLK",
e000e5e3
ACM
463 "SETLKW", "SETOWN", "GETOWN", "SETSIG", "GETSIG", "GETLK64",
464 "SETLK64", "SETLKW64", "SETOWN_EX", "GETOWN_EX",
465 "GETOWNER_UIDS",
80f587d5
ACM
466};
467static DEFINE_STRARRAY(fcntl_cmds);
468
83a51694
ACM
469static const char *fcntl_linux_specific_cmds[] = {
470 "SETLEASE", "GETLEASE", "NOTIFY", [5] = "CANCELLK", "DUPFD_CLOEXEC",
471 "SETPIPE_SZ", "GETPIPE_SZ", "ADD_SEALS", "GET_SEALS",
64e4561d 472 "GET_RW_HINT", "SET_RW_HINT", "GET_FILE_RW_HINT", "SET_FILE_RW_HINT",
83a51694
ACM
473};
474
475static DEFINE_STRARRAY_OFFSET(fcntl_linux_specific_cmds, F_LINUX_SPECIFIC_BASE);
476
477static struct strarray *fcntl_cmds_arrays[] = {
478 &strarray__fcntl_cmds,
479 &strarray__fcntl_linux_specific_cmds,
480};
481
482static DEFINE_STRARRAYS(fcntl_cmds_arrays);
483
c045bf02
ACM
484static const char *rlimit_resources[] = {
485 "CPU", "FSIZE", "DATA", "STACK", "CORE", "RSS", "NPROC", "NOFILE",
486 "MEMLOCK", "AS", "LOCKS", "SIGPENDING", "MSGQUEUE", "NICE", "RTPRIO",
487 "RTTIME",
488};
489static DEFINE_STRARRAY(rlimit_resources);
490
eb5b1b14
ACM
491static const char *sighow[] = { "BLOCK", "UNBLOCK", "SETMASK", };
492static DEFINE_STRARRAY(sighow);
493
4f8c1b74
DA
494static const char *clockid[] = {
495 "REALTIME", "MONOTONIC", "PROCESS_CPUTIME_ID", "THREAD_CPUTIME_ID",
28ebb87c
ACM
496 "MONOTONIC_RAW", "REALTIME_COARSE", "MONOTONIC_COARSE", "BOOTTIME",
497 "REALTIME_ALARM", "BOOTTIME_ALARM", "SGI_CYCLE", "TAI"
4f8c1b74
DA
498};
499static DEFINE_STRARRAY(clockid);
500
e10bce81
ACM
501static const char *socket_families[] = {
502 "UNSPEC", "LOCAL", "INET", "AX25", "IPX", "APPLETALK", "NETROM",
503 "BRIDGE", "ATMPVC", "X25", "INET6", "ROSE", "DECnet", "NETBEUI",
504 "SECURITY", "KEY", "NETLINK", "PACKET", "ASH", "ECONET", "ATMSVC",
505 "RDS", "SNA", "IRDA", "PPPOX", "WANPIPE", "LLC", "IB", "CAN", "TIPC",
506 "BLUETOOTH", "IUCV", "RXRPC", "ISDN", "PHONET", "IEEE802154", "CAIF",
507 "ALG", "NFC", "VSOCK",
508};
509static DEFINE_STRARRAY(socket_families);
510
51108999
ACM
511static size_t syscall_arg__scnprintf_access_mode(char *bf, size_t size,
512 struct syscall_arg *arg)
513{
514 size_t printed = 0;
515 int mode = arg->val;
516
517 if (mode == F_OK) /* 0 */
518 return scnprintf(bf, size, "F");
519#define P_MODE(n) \
520 if (mode & n##_OK) { \
521 printed += scnprintf(bf + printed, size - printed, "%s", #n); \
522 mode &= ~n##_OK; \
523 }
524
525 P_MODE(R);
526 P_MODE(W);
527 P_MODE(X);
528#undef P_MODE
529
530 if (mode)
531 printed += scnprintf(bf + printed, size - printed, "|%#x", mode);
532
533 return printed;
534}
535
536#define SCA_ACCMODE syscall_arg__scnprintf_access_mode
537
f994592d
ACM
538static size_t syscall_arg__scnprintf_filename(char *bf, size_t size,
539 struct syscall_arg *arg);
540
541#define SCA_FILENAME syscall_arg__scnprintf_filename
542
46cce19b
ACM
543static size_t syscall_arg__scnprintf_pipe_flags(char *bf, size_t size,
544 struct syscall_arg *arg)
545{
546 int printed = 0, flags = arg->val;
547
548#define P_FLAG(n) \
549 if (flags & O_##n) { \
550 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
551 flags &= ~O_##n; \
552 }
553
554 P_FLAG(CLOEXEC);
555 P_FLAG(NONBLOCK);
556#undef P_FLAG
557
558 if (flags)
559 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
560
561 return printed;
562}
563
564#define SCA_PIPE_FLAGS syscall_arg__scnprintf_pipe_flags
565
a355a61e
ACM
566#ifndef GRND_NONBLOCK
567#define GRND_NONBLOCK 0x0001
568#endif
569#ifndef GRND_RANDOM
570#define GRND_RANDOM 0x0002
571#endif
572
39878d49
ACM
573static size_t syscall_arg__scnprintf_getrandom_flags(char *bf, size_t size,
574 struct syscall_arg *arg)
575{
576 int printed = 0, flags = arg->val;
577
578#define P_FLAG(n) \
579 if (flags & GRND_##n) { \
580 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
581 flags &= ~GRND_##n; \
582 }
583
584 P_FLAG(RANDOM);
585 P_FLAG(NONBLOCK);
586#undef P_FLAG
587
588 if (flags)
589 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
590
591 return printed;
592}
593
594#define SCA_GETRANDOM_FLAGS syscall_arg__scnprintf_getrandom_flags
595
82d4a110
ACM
596#define STRARRAY(name, array) \
597 { .scnprintf = SCA_STRARRAY, \
598 .parm = &strarray__##array, }
453350dd 599
092bd3cd 600#include "trace/beauty/arch_errno_names.c"
ea8dc3ce 601#include "trace/beauty/eventfd.c"
d5d71e86 602#include "trace/beauty/futex_op.c"
3258abe0 603#include "trace/beauty/futex_val3.c"
df4cb167 604#include "trace/beauty/mmap.c"
ba2f22cf 605#include "trace/beauty/mode_t.c"
a30e6259 606#include "trace/beauty/msg_flags.c"
8f48df69 607#include "trace/beauty/open_flags.c"
62de344e 608#include "trace/beauty/perf_event_open.c"
d5d71e86 609#include "trace/beauty/pid.c"
a3bca91f 610#include "trace/beauty/sched_policy.c"
f5cd95ea 611#include "trace/beauty/seccomp.c"
12199d8e 612#include "trace/beauty/signum.c"
bbf86c43 613#include "trace/beauty/socket_type.c"
7206b900 614#include "trace/beauty/waitid_options.c"
a3bca91f 615
82d4a110
ACM
616struct syscall_arg_fmt {
617 size_t (*scnprintf)(char *bf, size_t size, struct syscall_arg *arg);
618 void *parm;
c51bdfec 619 const char *name;
d47737d5 620 bool show_zero;
82d4a110
ACM
621};
622
514f1c67
ACM
623static struct syscall_fmt {
624 const char *name;
aec1930b 625 const char *alias;
82d4a110 626 struct syscall_arg_fmt arg[6];
332337da 627 u8 nr_args;
11c8e39f 628 bool errpid;
514f1c67 629 bool timeout;
04b34729 630 bool hexret;
514f1c67 631} syscall_fmts[] = {
1f63139c 632 { .name = "access",
82d4a110 633 .arg = { [1] = { .scnprintf = SCA_ACCMODE, /* mode */ }, }, },
1f63139c 634 { .name = "bpf",
82d4a110 635 .arg = { [0] = STRARRAY(cmd, bpf_cmd), }, },
beccb2b5 636 { .name = "brk", .hexret = true,
82d4a110 637 .arg = { [0] = { .scnprintf = SCA_HEX, /* brk */ }, }, },
1f63139c 638 { .name = "clock_gettime",
82d4a110 639 .arg = { [0] = STRARRAY(clk_id, clockid), }, },
33396a3a
ACM
640 { .name = "clone", .errpid = true, .nr_args = 5,
641 .arg = { [0] = { .name = "flags", .scnprintf = SCA_CLONE_FLAGS, },
642 [1] = { .name = "child_stack", .scnprintf = SCA_HEX, },
643 [2] = { .name = "parent_tidptr", .scnprintf = SCA_HEX, },
644 [3] = { .name = "child_tidptr", .scnprintf = SCA_HEX, },
645 [4] = { .name = "tls", .scnprintf = SCA_HEX, }, }, },
1f63139c 646 { .name = "close",
82d4a110 647 .arg = { [0] = { .scnprintf = SCA_CLOSE_FD, /* fd */ }, }, },
1f63139c 648 { .name = "epoll_ctl",
82d4a110 649 .arg = { [1] = STRARRAY(op, epoll_ctl_ops), }, },
1f63139c 650 { .name = "eventfd2",
82d4a110 651 .arg = { [1] = { .scnprintf = SCA_EFD_FLAGS, /* flags */ }, }, },
1f63139c 652 { .name = "fchmodat",
82d4a110 653 .arg = { [0] = { .scnprintf = SCA_FDAT, /* fd */ }, }, },
1f63139c 654 { .name = "fchownat",
82d4a110 655 .arg = { [0] = { .scnprintf = SCA_FDAT, /* fd */ }, }, },
1f63139c 656 { .name = "fcntl",
82d4a110 657 .arg = { [1] = { .scnprintf = SCA_FCNTL_CMD, /* cmd */
39cc355b
ACM
658 .parm = &strarrays__fcntl_cmds_arrays,
659 .show_zero = true, },
82d4a110 660 [2] = { .scnprintf = SCA_FCNTL_ARG, /* arg */ }, }, },
1f63139c 661 { .name = "flock",
82d4a110 662 .arg = { [1] = { .scnprintf = SCA_FLOCK, /* cmd */ }, }, },
1f63139c
ACM
663 { .name = "fstat", .alias = "newfstat", },
664 { .name = "fstatat", .alias = "newfstatat", },
665 { .name = "futex",
3258abe0
ACM
666 .arg = { [1] = { .scnprintf = SCA_FUTEX_OP, /* op */ },
667 [5] = { .scnprintf = SCA_FUTEX_VAL3, /* val3 */ }, }, },
1f63139c 668 { .name = "futimesat",
82d4a110 669 .arg = { [0] = { .scnprintf = SCA_FDAT, /* fd */ }, }, },
1f63139c 670 { .name = "getitimer",
82d4a110 671 .arg = { [0] = STRARRAY(which, itimers), }, },
c65f1070 672 { .name = "getpid", .errpid = true, },
d1d438a3 673 { .name = "getpgid", .errpid = true, },
c65f1070 674 { .name = "getppid", .errpid = true, },
1f63139c 675 { .name = "getrandom",
82d4a110 676 .arg = { [2] = { .scnprintf = SCA_GETRANDOM_FLAGS, /* flags */ }, }, },
1f63139c 677 { .name = "getrlimit",
82d4a110 678 .arg = { [0] = STRARRAY(resource, rlimit_resources), }, },
2d1073de 679 { .name = "gettid", .errpid = true, },
1f63139c 680 { .name = "ioctl",
82d4a110 681 .arg = {
844ae5b4
ACM
682#if defined(__i386__) || defined(__x86_64__)
683/*
684 * FIXME: Make this available to all arches.
685 */
1cc47f2d 686 [1] = { .scnprintf = SCA_IOCTL_CMD, /* cmd */ },
82d4a110 687 [2] = { .scnprintf = SCA_HEX, /* arg */ }, }, },
844ae5b4 688#else
82d4a110 689 [2] = { .scnprintf = SCA_HEX, /* arg */ }, }, },
844ae5b4 690#endif
1de3038d
ACM
691 { .name = "kcmp", .nr_args = 5,
692 .arg = { [0] = { .name = "pid1", .scnprintf = SCA_PID, },
693 [1] = { .name = "pid2", .scnprintf = SCA_PID, },
694 [2] = { .name = "type", .scnprintf = SCA_KCMP_TYPE, },
695 [3] = { .name = "idx1", .scnprintf = SCA_KCMP_IDX, },
696 [4] = { .name = "idx2", .scnprintf = SCA_KCMP_IDX, }, }, },
1f63139c 697 { .name = "keyctl",
82d4a110 698 .arg = { [0] = STRARRAY(option, keyctl_options), }, },
1f63139c 699 { .name = "kill",
82d4a110 700 .arg = { [1] = { .scnprintf = SCA_SIGNUM, /* sig */ }, }, },
1f63139c 701 { .name = "linkat",
82d4a110 702 .arg = { [0] = { .scnprintf = SCA_FDAT, /* fd */ }, }, },
1f63139c 703 { .name = "lseek",
82d4a110 704 .arg = { [2] = STRARRAY(whence, whences), }, },
1f63139c
ACM
705 { .name = "lstat", .alias = "newlstat", },
706 { .name = "madvise",
82d4a110
ACM
707 .arg = { [0] = { .scnprintf = SCA_HEX, /* start */ },
708 [2] = { .scnprintf = SCA_MADV_BHV, /* behavior */ }, }, },
1f63139c 709 { .name = "mkdirat",
82d4a110 710 .arg = { [0] = { .scnprintf = SCA_FDAT, /* fd */ }, }, },
1f63139c 711 { .name = "mknodat",
82d4a110 712 .arg = { [0] = { .scnprintf = SCA_FDAT, /* fd */ }, }, },
1f63139c 713 { .name = "mlock",
82d4a110 714 .arg = { [0] = { .scnprintf = SCA_HEX, /* addr */ }, }, },
1f63139c 715 { .name = "mlockall",
82d4a110 716 .arg = { [0] = { .scnprintf = SCA_HEX, /* addr */ }, }, },
beccb2b5 717 { .name = "mmap", .hexret = true,
54265664
JO
718/* The standard mmap maps to old_mmap on s390x */
719#if defined(__s390x__)
720 .alias = "old_mmap",
721#endif
82d4a110
ACM
722 .arg = { [0] = { .scnprintf = SCA_HEX, /* addr */ },
723 [2] = { .scnprintf = SCA_MMAP_PROT, /* prot */ },
724 [3] = { .scnprintf = SCA_MMAP_FLAGS, /* flags */ }, }, },
1f63139c 725 { .name = "mprotect",
82d4a110
ACM
726 .arg = { [0] = { .scnprintf = SCA_HEX, /* start */ },
727 [2] = { .scnprintf = SCA_MMAP_PROT, /* prot */ }, }, },
1f63139c 728 { .name = "mq_unlink",
82d4a110 729 .arg = { [0] = { .scnprintf = SCA_FILENAME, /* u_name */ }, }, },
ae685380 730 { .name = "mremap", .hexret = true,
82d4a110
ACM
731 .arg = { [0] = { .scnprintf = SCA_HEX, /* addr */ },
732 [3] = { .scnprintf = SCA_MREMAP_FLAGS, /* flags */ },
733 [4] = { .scnprintf = SCA_HEX, /* new_addr */ }, }, },
1f63139c 734 { .name = "munlock",
82d4a110 735 .arg = { [0] = { .scnprintf = SCA_HEX, /* addr */ }, }, },
1f63139c 736 { .name = "munmap",
82d4a110 737 .arg = { [0] = { .scnprintf = SCA_HEX, /* addr */ }, }, },
1f63139c 738 { .name = "name_to_handle_at",
82d4a110 739 .arg = { [0] = { .scnprintf = SCA_FDAT, /* dfd */ }, }, },
1f63139c 740 { .name = "newfstatat",
82d4a110 741 .arg = { [0] = { .scnprintf = SCA_FDAT, /* dfd */ }, }, },
1f63139c 742 { .name = "open",
82d4a110 743 .arg = { [1] = { .scnprintf = SCA_OPEN_FLAGS, /* flags */ }, }, },
1f63139c 744 { .name = "open_by_handle_at",
82d4a110
ACM
745 .arg = { [0] = { .scnprintf = SCA_FDAT, /* dfd */ },
746 [2] = { .scnprintf = SCA_OPEN_FLAGS, /* flags */ }, }, },
1f63139c 747 { .name = "openat",
82d4a110
ACM
748 .arg = { [0] = { .scnprintf = SCA_FDAT, /* dfd */ },
749 [2] = { .scnprintf = SCA_OPEN_FLAGS, /* flags */ }, }, },
1f63139c 750 { .name = "perf_event_open",
82d4a110
ACM
751 .arg = { [2] = { .scnprintf = SCA_INT, /* cpu */ },
752 [3] = { .scnprintf = SCA_FD, /* group_fd */ },
753 [4] = { .scnprintf = SCA_PERF_FLAGS, /* flags */ }, }, },
1f63139c 754 { .name = "pipe2",
82d4a110 755 .arg = { [1] = { .scnprintf = SCA_PIPE_FLAGS, /* flags */ }, }, },
83bc9c37
ACM
756 { .name = "pkey_alloc",
757 .arg = { [1] = { .scnprintf = SCA_PKEY_ALLOC_ACCESS_RIGHTS, /* access_rights */ }, }, },
758 { .name = "pkey_free",
759 .arg = { [0] = { .scnprintf = SCA_INT, /* key */ }, }, },
760 { .name = "pkey_mprotect",
761 .arg = { [0] = { .scnprintf = SCA_HEX, /* start */ },
762 [2] = { .scnprintf = SCA_MMAP_PROT, /* prot */ },
763 [3] = { .scnprintf = SCA_INT, /* pkey */ }, }, },
1f63139c
ACM
764 { .name = "poll", .timeout = true, },
765 { .name = "ppoll", .timeout = true, },
d688d037
ACM
766 { .name = "prctl", .alias = "arch_prctl",
767 .arg = { [0] = { .scnprintf = SCA_PRCTL_OPTION, /* option */ },
768 [1] = { .scnprintf = SCA_PRCTL_ARG2, /* arg2 */ },
769 [2] = { .scnprintf = SCA_PRCTL_ARG3, /* arg3 */ }, }, },
1f63139c
ACM
770 { .name = "pread", .alias = "pread64", },
771 { .name = "preadv", .alias = "pread", },
772 { .name = "prlimit64",
82d4a110 773 .arg = { [1] = STRARRAY(resource, rlimit_resources), }, },
1f63139c
ACM
774 { .name = "pwrite", .alias = "pwrite64", },
775 { .name = "readlinkat",
82d4a110 776 .arg = { [0] = { .scnprintf = SCA_FDAT, /* dfd */ }, }, },
1f63139c 777 { .name = "recvfrom",
82d4a110 778 .arg = { [3] = { .scnprintf = SCA_MSG_FLAGS, /* flags */ }, }, },
1f63139c 779 { .name = "recvmmsg",
82d4a110 780 .arg = { [3] = { .scnprintf = SCA_MSG_FLAGS, /* flags */ }, }, },
1f63139c 781 { .name = "recvmsg",
82d4a110 782 .arg = { [2] = { .scnprintf = SCA_MSG_FLAGS, /* flags */ }, }, },
1f63139c 783 { .name = "renameat",
82d4a110 784 .arg = { [0] = { .scnprintf = SCA_FDAT, /* dfd */ }, }, },
1f63139c 785 { .name = "rt_sigaction",
82d4a110 786 .arg = { [0] = { .scnprintf = SCA_SIGNUM, /* sig */ }, }, },
1f63139c 787 { .name = "rt_sigprocmask",
82d4a110 788 .arg = { [0] = STRARRAY(how, sighow), }, },
1f63139c 789 { .name = "rt_sigqueueinfo",
82d4a110 790 .arg = { [1] = { .scnprintf = SCA_SIGNUM, /* sig */ }, }, },
1f63139c 791 { .name = "rt_tgsigqueueinfo",
82d4a110 792 .arg = { [2] = { .scnprintf = SCA_SIGNUM, /* sig */ }, }, },
1f63139c 793 { .name = "sched_setscheduler",
82d4a110 794 .arg = { [1] = { .scnprintf = SCA_SCHED_POLICY, /* policy */ }, }, },
1f63139c 795 { .name = "seccomp",
82d4a110
ACM
796 .arg = { [0] = { .scnprintf = SCA_SECCOMP_OP, /* op */ },
797 [1] = { .scnprintf = SCA_SECCOMP_FLAGS, /* flags */ }, }, },
1f63139c
ACM
798 { .name = "select", .timeout = true, },
799 { .name = "sendmmsg",
82d4a110 800 .arg = { [3] = { .scnprintf = SCA_MSG_FLAGS, /* flags */ }, }, },
1f63139c 801 { .name = "sendmsg",
82d4a110 802 .arg = { [2] = { .scnprintf = SCA_MSG_FLAGS, /* flags */ }, }, },
1f63139c 803 { .name = "sendto",
82d4a110 804 .arg = { [3] = { .scnprintf = SCA_MSG_FLAGS, /* flags */ }, }, },
c65f1070 805 { .name = "set_tid_address", .errpid = true, },
1f63139c 806 { .name = "setitimer",
82d4a110 807 .arg = { [0] = STRARRAY(which, itimers), }, },
1f63139c 808 { .name = "setrlimit",
82d4a110 809 .arg = { [0] = STRARRAY(resource, rlimit_resources), }, },
1f63139c 810 { .name = "socket",
82d4a110 811 .arg = { [0] = STRARRAY(family, socket_families),
162d3edb
ACM
812 [1] = { .scnprintf = SCA_SK_TYPE, /* type */ },
813 [2] = { .scnprintf = SCA_SK_PROTO, /* protocol */ }, }, },
1f63139c 814 { .name = "socketpair",
82d4a110 815 .arg = { [0] = STRARRAY(family, socket_families),
162d3edb
ACM
816 [1] = { .scnprintf = SCA_SK_TYPE, /* type */ },
817 [2] = { .scnprintf = SCA_SK_PROTO, /* protocol */ }, }, },
1f63139c
ACM
818 { .name = "stat", .alias = "newstat", },
819 { .name = "statx",
82d4a110
ACM
820 .arg = { [0] = { .scnprintf = SCA_FDAT, /* fdat */ },
821 [2] = { .scnprintf = SCA_STATX_FLAGS, /* flags */ } ,
822 [3] = { .scnprintf = SCA_STATX_MASK, /* mask */ }, }, },
1f63139c 823 { .name = "swapoff",
82d4a110 824 .arg = { [0] = { .scnprintf = SCA_FILENAME, /* specialfile */ }, }, },
1f63139c 825 { .name = "swapon",
82d4a110 826 .arg = { [0] = { .scnprintf = SCA_FILENAME, /* specialfile */ }, }, },
1f63139c 827 { .name = "symlinkat",
82d4a110 828 .arg = { [0] = { .scnprintf = SCA_FDAT, /* dfd */ }, }, },
1f63139c 829 { .name = "tgkill",
82d4a110 830 .arg = { [2] = { .scnprintf = SCA_SIGNUM, /* sig */ }, }, },
1f63139c 831 { .name = "tkill",
82d4a110 832 .arg = { [1] = { .scnprintf = SCA_SIGNUM, /* sig */ }, }, },
1f63139c
ACM
833 { .name = "uname", .alias = "newuname", },
834 { .name = "unlinkat",
82d4a110 835 .arg = { [0] = { .scnprintf = SCA_FDAT, /* dfd */ }, }, },
1f63139c 836 { .name = "utimensat",
82d4a110 837 .arg = { [0] = { .scnprintf = SCA_FDAT, /* dirfd */ }, }, },
11c8e39f 838 { .name = "wait4", .errpid = true,
82d4a110 839 .arg = { [2] = { .scnprintf = SCA_WAITID_OPTIONS, /* options */ }, }, },
11c8e39f 840 { .name = "waitid", .errpid = true,
82d4a110 841 .arg = { [3] = { .scnprintf = SCA_WAITID_OPTIONS, /* options */ }, }, },
514f1c67
ACM
842};
843
844static int syscall_fmt__cmp(const void *name, const void *fmtp)
845{
846 const struct syscall_fmt *fmt = fmtp;
847 return strcmp(name, fmt->name);
848}
849
850static struct syscall_fmt *syscall_fmt__find(const char *name)
851{
852 const int nmemb = ARRAY_SIZE(syscall_fmts);
853 return bsearch(name, syscall_fmts, nmemb, sizeof(struct syscall_fmt), syscall_fmt__cmp);
854}
855
6a648b53
ACM
856/*
857 * is_exit: is this "exit" or "exit_group"?
858 * is_open: is this "open" or "openat"? To associate the fd returned in sys_exit with the pathname in sys_enter.
7a983a0f 859 * args_size: sum of the sizes of the syscall arguments, anything after that is augmented stuff: pathname for openat, etc.
6a648b53 860 */
514f1c67
ACM
861struct syscall {
862 struct event_format *tp_format;
f208bd8d 863 int nr_args;
7a983a0f 864 int args_size;
6a648b53
ACM
865 bool is_exit;
866 bool is_open;
f208bd8d 867 struct format_field *args;
514f1c67
ACM
868 const char *name;
869 struct syscall_fmt *fmt;
82d4a110 870 struct syscall_arg_fmt *arg_fmt;
514f1c67
ACM
871};
872
fd2b2975
ACM
873/*
874 * We need to have this 'calculated' boolean because in some cases we really
875 * don't know what is the duration of a syscall, for instance, when we start
876 * a session and some threads are waiting for a syscall to finish, say 'poll',
877 * in which case all we can do is to print "( ? ) for duration and for the
878 * start timestamp.
879 */
880static size_t fprintf_duration(unsigned long t, bool calculated, FILE *fp)
60c907ab
ACM
881{
882 double duration = (double)t / NSEC_PER_MSEC;
883 size_t printed = fprintf(fp, "(");
884
fd2b2975 885 if (!calculated)
522283fe 886 printed += fprintf(fp, " ");
fd2b2975 887 else if (duration >= 1.0)
60c907ab
ACM
888 printed += color_fprintf(fp, PERF_COLOR_RED, "%6.3f ms", duration);
889 else if (duration >= 0.01)
890 printed += color_fprintf(fp, PERF_COLOR_YELLOW, "%6.3f ms", duration);
891 else
892 printed += color_fprintf(fp, PERF_COLOR_NORMAL, "%6.3f ms", duration);
c24ff998 893 return printed + fprintf(fp, "): ");
60c907ab
ACM
894}
895
f994592d
ACM
896/**
897 * filename.ptr: The filename char pointer that will be vfs_getname'd
898 * filename.entry_str_pos: Where to insert the string translated from
899 * filename.ptr by the vfs_getname tracepoint/kprobe.
84486caa
ACM
900 * ret_scnprintf: syscall args may set this to a different syscall return
901 * formatter, for instance, fcntl may return fds, file flags, etc.
f994592d 902 */
752fde44
ACM
903struct thread_trace {
904 u64 entry_time;
752fde44 905 bool entry_pending;
efd5745e 906 unsigned long nr_events;
a2ea67d7 907 unsigned long pfmaj, pfmin;
752fde44 908 char *entry_str;
1302d88e 909 double runtime_ms;
7ee57434 910 size_t (*ret_scnprintf)(char *bf, size_t size, struct syscall_arg *arg);
f994592d
ACM
911 struct {
912 unsigned long ptr;
7f4f8001
ACM
913 short int entry_str_pos;
914 bool pending_open;
915 unsigned int namelen;
916 char *name;
f994592d 917 } filename;
75b757ca
ACM
918 struct {
919 int max;
920 char **table;
921 } paths;
bf2575c1
DA
922
923 struct intlist *syscall_stats;
752fde44
ACM
924};
925
926static struct thread_trace *thread_trace__new(void)
927{
75b757ca
ACM
928 struct thread_trace *ttrace = zalloc(sizeof(struct thread_trace));
929
930 if (ttrace)
931 ttrace->paths.max = -1;
932
bf2575c1
DA
933 ttrace->syscall_stats = intlist__new(NULL);
934
75b757ca 935 return ttrace;
752fde44
ACM
936}
937
c24ff998 938static struct thread_trace *thread__trace(struct thread *thread, FILE *fp)
752fde44 939{
efd5745e
ACM
940 struct thread_trace *ttrace;
941
752fde44
ACM
942 if (thread == NULL)
943 goto fail;
944
89dceb22
NK
945 if (thread__priv(thread) == NULL)
946 thread__set_priv(thread, thread_trace__new());
48000a1a 947
89dceb22 948 if (thread__priv(thread) == NULL)
752fde44
ACM
949 goto fail;
950
89dceb22 951 ttrace = thread__priv(thread);
efd5745e
ACM
952 ++ttrace->nr_events;
953
954 return ttrace;
752fde44 955fail:
c24ff998 956 color_fprintf(fp, PERF_COLOR_RED,
752fde44
ACM
957 "WARNING: not enough memory, dropping samples!\n");
958 return NULL;
959}
960
84486caa
ACM
961
962void syscall_arg__set_ret_scnprintf(struct syscall_arg *arg,
7ee57434 963 size_t (*ret_scnprintf)(char *bf, size_t size, struct syscall_arg *arg))
84486caa
ACM
964{
965 struct thread_trace *ttrace = thread__priv(arg->thread);
966
967 ttrace->ret_scnprintf = ret_scnprintf;
968}
969
598d02c5
SF
970#define TRACE_PFMAJ (1 << 0)
971#define TRACE_PFMIN (1 << 1)
972
e4d44e83
ACM
973static const size_t trace__entry_str_size = 2048;
974
97119f37 975static int trace__set_fd_pathname(struct thread *thread, int fd, const char *pathname)
75b757ca 976{
89dceb22 977 struct thread_trace *ttrace = thread__priv(thread);
75b757ca
ACM
978
979 if (fd > ttrace->paths.max) {
980 char **npath = realloc(ttrace->paths.table, (fd + 1) * sizeof(char *));
981
982 if (npath == NULL)
983 return -1;
984
985 if (ttrace->paths.max != -1) {
986 memset(npath + ttrace->paths.max + 1, 0,
987 (fd - ttrace->paths.max) * sizeof(char *));
988 } else {
989 memset(npath, 0, (fd + 1) * sizeof(char *));
990 }
991
992 ttrace->paths.table = npath;
993 ttrace->paths.max = fd;
994 }
995
996 ttrace->paths.table[fd] = strdup(pathname);
997
998 return ttrace->paths.table[fd] != NULL ? 0 : -1;
999}
1000
97119f37
ACM
1001static int thread__read_fd_path(struct thread *thread, int fd)
1002{
1003 char linkname[PATH_MAX], pathname[PATH_MAX];
1004 struct stat st;
1005 int ret;
1006
1007 if (thread->pid_ == thread->tid) {
1008 scnprintf(linkname, sizeof(linkname),
1009 "/proc/%d/fd/%d", thread->pid_, fd);
1010 } else {
1011 scnprintf(linkname, sizeof(linkname),
1012 "/proc/%d/task/%d/fd/%d", thread->pid_, thread->tid, fd);
1013 }
1014
1015 if (lstat(linkname, &st) < 0 || st.st_size + 1 > (off_t)sizeof(pathname))
1016 return -1;
1017
1018 ret = readlink(linkname, pathname, sizeof(pathname));
1019
1020 if (ret < 0 || ret > st.st_size)
1021 return -1;
1022
1023 pathname[ret] = '\0';
1024 return trace__set_fd_pathname(thread, fd, pathname);
1025}
1026
c522739d
ACM
1027static const char *thread__fd_path(struct thread *thread, int fd,
1028 struct trace *trace)
75b757ca 1029{
89dceb22 1030 struct thread_trace *ttrace = thread__priv(thread);
75b757ca
ACM
1031
1032 if (ttrace == NULL)
1033 return NULL;
1034
1035 if (fd < 0)
1036 return NULL;
1037
cdcd1e6b 1038 if ((fd > ttrace->paths.max || ttrace->paths.table[fd] == NULL)) {
c522739d
ACM
1039 if (!trace->live)
1040 return NULL;
1041 ++trace->stats.proc_getname;
cdcd1e6b 1042 if (thread__read_fd_path(thread, fd))
c522739d
ACM
1043 return NULL;
1044 }
75b757ca
ACM
1045
1046 return ttrace->paths.table[fd];
1047}
1048
fc65eb82 1049size_t syscall_arg__scnprintf_fd(char *bf, size_t size, struct syscall_arg *arg)
75b757ca
ACM
1050{
1051 int fd = arg->val;
1052 size_t printed = scnprintf(bf, size, "%d", fd);
c522739d 1053 const char *path = thread__fd_path(arg->thread, fd, arg->trace);
75b757ca
ACM
1054
1055 if (path)
1056 printed += scnprintf(bf + printed, size - printed, "<%s>", path);
1057
1058 return printed;
1059}
1060
0a2f7540
ACM
1061size_t pid__scnprintf_fd(struct trace *trace, pid_t pid, int fd, char *bf, size_t size)
1062{
1063 size_t printed = scnprintf(bf, size, "%d", fd);
1064 struct thread *thread = machine__find_thread(trace->host, pid, pid);
1065
1066 if (thread) {
1067 const char *path = thread__fd_path(thread, fd, trace);
1068
1069 if (path)
1070 printed += scnprintf(bf + printed, size - printed, "<%s>", path);
1071
1072 thread__put(thread);
1073 }
1074
1075 return printed;
1076}
1077
75b757ca
ACM
1078static size_t syscall_arg__scnprintf_close_fd(char *bf, size_t size,
1079 struct syscall_arg *arg)
1080{
1081 int fd = arg->val;
1082 size_t printed = syscall_arg__scnprintf_fd(bf, size, arg);
89dceb22 1083 struct thread_trace *ttrace = thread__priv(arg->thread);
75b757ca 1084
04662523
ACM
1085 if (ttrace && fd >= 0 && fd <= ttrace->paths.max)
1086 zfree(&ttrace->paths.table[fd]);
75b757ca
ACM
1087
1088 return printed;
1089}
1090
f994592d
ACM
1091static void thread__set_filename_pos(struct thread *thread, const char *bf,
1092 unsigned long ptr)
1093{
1094 struct thread_trace *ttrace = thread__priv(thread);
1095
1096 ttrace->filename.ptr = ptr;
1097 ttrace->filename.entry_str_pos = bf - ttrace->entry_str;
1098}
1099
75d1e306
ACM
1100static size_t syscall_arg__scnprintf_augmented_string(struct syscall_arg *arg, char *bf, size_t size)
1101{
1102 struct augmented_arg *augmented_arg = arg->augmented.args;
1103
1104 return scnprintf(bf, size, "%.*s", augmented_arg->size, augmented_arg->value);
1105}
1106
f994592d
ACM
1107static size_t syscall_arg__scnprintf_filename(char *bf, size_t size,
1108 struct syscall_arg *arg)
1109{
1110 unsigned long ptr = arg->val;
1111
75d1e306
ACM
1112 if (arg->augmented.args)
1113 return syscall_arg__scnprintf_augmented_string(arg, bf, size);
1114
f994592d
ACM
1115 if (!arg->trace->vfs_getname)
1116 return scnprintf(bf, size, "%#x", ptr);
1117
1118 thread__set_filename_pos(arg->thread, bf, ptr);
1119 return 0;
1120}
1121
ae9ed035
ACM
1122static bool trace__filter_duration(struct trace *trace, double t)
1123{
1124 return t < (trace->duration_filter * NSEC_PER_MSEC);
1125}
1126
fd2b2975 1127static size_t __trace__fprintf_tstamp(struct trace *trace, u64 tstamp, FILE *fp)
752fde44
ACM
1128{
1129 double ts = (double)(tstamp - trace->base_time) / NSEC_PER_MSEC;
1130
60c907ab 1131 return fprintf(fp, "%10.3f ", ts);
752fde44
ACM
1132}
1133
fd2b2975
ACM
1134/*
1135 * We're handling tstamp=0 as an undefined tstamp, i.e. like when we are
1136 * using ttrace->entry_time for a thread that receives a sys_exit without
1137 * first having received a sys_enter ("poll" issued before tracing session
1138 * starts, lost sys_enter exit due to ring buffer overflow).
1139 */
1140static size_t trace__fprintf_tstamp(struct trace *trace, u64 tstamp, FILE *fp)
1141{
1142 if (tstamp > 0)
1143 return __trace__fprintf_tstamp(trace, tstamp, fp);
1144
1145 return fprintf(fp, " ? ");
1146}
1147
f15eb531 1148static bool done = false;
ba209f85 1149static bool interrupted = false;
f15eb531 1150
ba209f85 1151static void sig_handler(int sig)
f15eb531
NK
1152{
1153 done = true;
ba209f85 1154 interrupted = sig == SIGINT;
f15eb531
NK
1155}
1156
6dcbd212 1157static size_t trace__fprintf_comm_tid(struct trace *trace, struct thread *thread, FILE *fp)
752fde44 1158{
6dcbd212 1159 size_t printed = 0;
752fde44 1160
50c95cbd
ACM
1161 if (trace->multiple_threads) {
1162 if (trace->show_comm)
1902efe7 1163 printed += fprintf(fp, "%.14s/", thread__comm_str(thread));
38051234 1164 printed += fprintf(fp, "%d ", thread->tid);
50c95cbd 1165 }
752fde44
ACM
1166
1167 return printed;
1168}
1169
6dcbd212
ACM
1170static size_t trace__fprintf_entry_head(struct trace *trace, struct thread *thread,
1171 u64 duration, bool duration_calculated, u64 tstamp, FILE *fp)
1172{
1173 size_t printed = trace__fprintf_tstamp(trace, tstamp, fp);
1174 printed += fprintf_duration(duration, duration_calculated, fp);
1175 return printed + trace__fprintf_comm_tid(trace, thread, fp);
1176}
1177
c24ff998 1178static int trace__process_event(struct trace *trace, struct machine *machine,
162f0bef 1179 union perf_event *event, struct perf_sample *sample)
752fde44
ACM
1180{
1181 int ret = 0;
1182
1183 switch (event->header.type) {
1184 case PERF_RECORD_LOST:
c24ff998 1185 color_fprintf(trace->output, PERF_COLOR_RED,
752fde44 1186 "LOST %" PRIu64 " events!\n", event->lost.lost);
162f0bef 1187 ret = machine__process_lost_event(machine, event, sample);
3ed5ca2e 1188 break;
752fde44 1189 default:
162f0bef 1190 ret = machine__process_event(machine, event, sample);
752fde44
ACM
1191 break;
1192 }
1193
1194 return ret;
1195}
1196
c24ff998 1197static int trace__tool_process(struct perf_tool *tool,
752fde44 1198 union perf_event *event,
162f0bef 1199 struct perf_sample *sample,
752fde44
ACM
1200 struct machine *machine)
1201{
c24ff998 1202 struct trace *trace = container_of(tool, struct trace, tool);
162f0bef 1203 return trace__process_event(trace, machine, event, sample);
752fde44
ACM
1204}
1205
caf8a0d0
ACM
1206static char *trace__machine__resolve_kernel_addr(void *vmachine, unsigned long long *addrp, char **modp)
1207{
1208 struct machine *machine = vmachine;
1209
1210 if (machine->kptr_restrict_warned)
1211 return NULL;
1212
1213 if (symbol_conf.kptr_restrict) {
1214 pr_warning("Kernel address maps (/proc/{kallsyms,modules}) are restricted.\n\n"
1215 "Check /proc/sys/kernel/kptr_restrict.\n\n"
1216 "Kernel samples will not be resolved.\n");
1217 machine->kptr_restrict_warned = true;
1218 return NULL;
1219 }
1220
1221 return machine__resolve_kernel_addr(vmachine, addrp, modp);
1222}
1223
752fde44
ACM
1224static int trace__symbols_init(struct trace *trace, struct perf_evlist *evlist)
1225{
0a7e6d1b 1226 int err = symbol__init(NULL);
752fde44
ACM
1227
1228 if (err)
1229 return err;
1230
8fb598e5
DA
1231 trace->host = machine__new_host();
1232 if (trace->host == NULL)
1233 return -ENOMEM;
752fde44 1234
cbd5c178
AV
1235 err = trace_event__register_resolver(trace->host, trace__machine__resolve_kernel_addr);
1236 if (err < 0)
1237 goto out;
706c3da4 1238
a33fbd56 1239 err = __machine__synthesize_threads(trace->host, &trace->tool, &trace->opts.target,
9d9cad76 1240 evlist->threads, trace__tool_process, false,
340b47f5 1241 trace->opts.proc_map_timeout, 1);
cbd5c178 1242out:
752fde44
ACM
1243 if (err)
1244 symbol__exit();
1245
1246 return err;
1247}
1248
33974a41
AV
1249static void trace__symbols__exit(struct trace *trace)
1250{
1251 machine__exit(trace->host);
1252 trace->host = NULL;
1253
1254 symbol__exit();
1255}
1256
5e58fcfa 1257static int syscall__alloc_arg_fmts(struct syscall *sc, int nr_args)
13d4ff3e 1258{
5e58fcfa 1259 int idx;
13d4ff3e 1260
332337da
ACM
1261 if (nr_args == 6 && sc->fmt && sc->fmt->nr_args != 0)
1262 nr_args = sc->fmt->nr_args;
1263
5e58fcfa 1264 sc->arg_fmt = calloc(nr_args, sizeof(*sc->arg_fmt));
82d4a110 1265 if (sc->arg_fmt == NULL)
13d4ff3e
ACM
1266 return -1;
1267
5e58fcfa
ACM
1268 for (idx = 0; idx < nr_args; ++idx) {
1269 if (sc->fmt)
82d4a110 1270 sc->arg_fmt[idx] = sc->fmt->arg[idx];
5e58fcfa 1271 }
82d4a110 1272
5e58fcfa
ACM
1273 sc->nr_args = nr_args;
1274 return 0;
1275}
1276
1277static int syscall__set_arg_fmts(struct syscall *sc)
1278{
7a983a0f 1279 struct format_field *field, *last_field = NULL;
5e58fcfa
ACM
1280 int idx = 0, len;
1281
1282 for (field = sc->args; field; field = field->next, ++idx) {
7a983a0f
ACM
1283 last_field = field;
1284
5e58fcfa
ACM
1285 if (sc->fmt && sc->fmt->arg[idx].scnprintf)
1286 continue;
1f115cb7 1287
82d4a110 1288 if (strcmp(field->type, "const char *") == 0 &&
12f3ca4f
ACM
1289 (strcmp(field->name, "filename") == 0 ||
1290 strcmp(field->name, "path") == 0 ||
1291 strcmp(field->name, "pathname") == 0))
82d4a110 1292 sc->arg_fmt[idx].scnprintf = SCA_FILENAME;
beccb2b5 1293 else if (field->flags & FIELD_IS_POINTER)
82d4a110 1294 sc->arg_fmt[idx].scnprintf = syscall_arg__scnprintf_hex;
d1d438a3 1295 else if (strcmp(field->type, "pid_t") == 0)
82d4a110 1296 sc->arg_fmt[idx].scnprintf = SCA_PID;
ba2f22cf 1297 else if (strcmp(field->type, "umode_t") == 0)
82d4a110 1298 sc->arg_fmt[idx].scnprintf = SCA_MODE_T;
b6565c90
ACM
1299 else if ((strcmp(field->type, "int") == 0 ||
1300 strcmp(field->type, "unsigned int") == 0 ||
1301 strcmp(field->type, "long") == 0) &&
1302 (len = strlen(field->name)) >= 2 &&
1303 strcmp(field->name + len - 2, "fd") == 0) {
1304 /*
1305 * /sys/kernel/tracing/events/syscalls/sys_enter*
1306 * egrep 'field:.*fd;' .../format|sed -r 's/.*field:([a-z ]+) [a-z_]*fd.+/\1/g'|sort|uniq -c
1307 * 65 int
1308 * 23 unsigned int
1309 * 7 unsigned long
1310 */
82d4a110 1311 sc->arg_fmt[idx].scnprintf = SCA_FD;
b6565c90 1312 }
13d4ff3e
ACM
1313 }
1314
7a983a0f
ACM
1315 if (last_field)
1316 sc->args_size = last_field->offset + last_field->size;
1317
13d4ff3e
ACM
1318 return 0;
1319}
1320
514f1c67
ACM
1321static int trace__read_syscall_info(struct trace *trace, int id)
1322{
1323 char tp_name[128];
1324 struct syscall *sc;
fd0db102 1325 const char *name = syscalltbl__name(trace->sctbl, id);
3a531260
ACM
1326
1327 if (name == NULL)
1328 return -1;
514f1c67
ACM
1329
1330 if (id > trace->syscalls.max) {
1331 struct syscall *nsyscalls = realloc(trace->syscalls.table, (id + 1) * sizeof(*sc));
1332
1333 if (nsyscalls == NULL)
1334 return -1;
1335
1336 if (trace->syscalls.max != -1) {
1337 memset(nsyscalls + trace->syscalls.max + 1, 0,
1338 (id - trace->syscalls.max) * sizeof(*sc));
1339 } else {
1340 memset(nsyscalls, 0, (id + 1) * sizeof(*sc));
1341 }
1342
1343 trace->syscalls.table = nsyscalls;
1344 trace->syscalls.max = id;
1345 }
1346
1347 sc = trace->syscalls.table + id;
3a531260 1348 sc->name = name;
2ae3a312 1349
3a531260 1350 sc->fmt = syscall_fmt__find(sc->name);
514f1c67 1351
aec1930b 1352 snprintf(tp_name, sizeof(tp_name), "sys_enter_%s", sc->name);
97978b3e 1353 sc->tp_format = trace_event__tp_format("syscalls", tp_name);
aec1930b 1354
8dd2a131 1355 if (IS_ERR(sc->tp_format) && sc->fmt && sc->fmt->alias) {
aec1930b 1356 snprintf(tp_name, sizeof(tp_name), "sys_enter_%s", sc->fmt->alias);
97978b3e 1357 sc->tp_format = trace_event__tp_format("syscalls", tp_name);
aec1930b 1358 }
514f1c67 1359
5e58fcfa
ACM
1360 if (syscall__alloc_arg_fmts(sc, IS_ERR(sc->tp_format) ? 6 : sc->tp_format->format.nr_fields))
1361 return -1;
1362
8dd2a131 1363 if (IS_ERR(sc->tp_format))
13d4ff3e
ACM
1364 return -1;
1365
f208bd8d 1366 sc->args = sc->tp_format->format.fields;
c42de706
TS
1367 /*
1368 * We need to check and discard the first variable '__syscall_nr'
1369 * or 'nr' that mean the syscall number. It is needless here.
1370 * So drop '__syscall_nr' or 'nr' field but does not exist on older kernels.
1371 */
1372 if (sc->args && (!strcmp(sc->args->name, "__syscall_nr") || !strcmp(sc->args->name, "nr"))) {
f208bd8d
ACM
1373 sc->args = sc->args->next;
1374 --sc->nr_args;
1375 }
1376
5089f20e 1377 sc->is_exit = !strcmp(name, "exit_group") || !strcmp(name, "exit");
6a648b53 1378 sc->is_open = !strcmp(name, "open") || !strcmp(name, "openat");
5089f20e 1379
13d4ff3e 1380 return syscall__set_arg_fmts(sc);
514f1c67
ACM
1381}
1382
d0cc439b
ACM
1383static int trace__validate_ev_qualifier(struct trace *trace)
1384{
8b3ce757 1385 int err = 0, i;
27702bcf 1386 size_t nr_allocated;
d0cc439b
ACM
1387 struct str_node *pos;
1388
8b3ce757
ACM
1389 trace->ev_qualifier_ids.nr = strlist__nr_entries(trace->ev_qualifier);
1390 trace->ev_qualifier_ids.entries = malloc(trace->ev_qualifier_ids.nr *
1391 sizeof(trace->ev_qualifier_ids.entries[0]));
1392
1393 if (trace->ev_qualifier_ids.entries == NULL) {
1394 fputs("Error:\tNot enough memory for allocating events qualifier ids\n",
1395 trace->output);
1396 err = -EINVAL;
1397 goto out;
1398 }
1399
27702bcf 1400 nr_allocated = trace->ev_qualifier_ids.nr;
8b3ce757
ACM
1401 i = 0;
1402
602a1f4d 1403 strlist__for_each_entry(pos, trace->ev_qualifier) {
d0cc439b 1404 const char *sc = pos->s;
27702bcf 1405 int id = syscalltbl__id(trace->sctbl, sc), match_next = -1;
d0cc439b 1406
8b3ce757 1407 if (id < 0) {
27702bcf
ACM
1408 id = syscalltbl__strglobmatch_first(trace->sctbl, sc, &match_next);
1409 if (id >= 0)
1410 goto matches;
1411
d0cc439b
ACM
1412 if (err == 0) {
1413 fputs("Error:\tInvalid syscall ", trace->output);
1414 err = -EINVAL;
1415 } else {
1416 fputs(", ", trace->output);
1417 }
1418
1419 fputs(sc, trace->output);
1420 }
27702bcf 1421matches:
8b3ce757 1422 trace->ev_qualifier_ids.entries[i++] = id;
27702bcf
ACM
1423 if (match_next == -1)
1424 continue;
1425
1426 while (1) {
1427 id = syscalltbl__strglobmatch_next(trace->sctbl, sc, &match_next);
1428 if (id < 0)
1429 break;
1430 if (nr_allocated == trace->ev_qualifier_ids.nr) {
1431 void *entries;
1432
1433 nr_allocated += 8;
1434 entries = realloc(trace->ev_qualifier_ids.entries,
1435 nr_allocated * sizeof(trace->ev_qualifier_ids.entries[0]));
1436 if (entries == NULL) {
1437 err = -ENOMEM;
1438 fputs("\nError:\t Not enough memory for parsing\n", trace->output);
1439 goto out_free;
1440 }
1441 trace->ev_qualifier_ids.entries = entries;
1442 }
1443 trace->ev_qualifier_ids.nr++;
1444 trace->ev_qualifier_ids.entries[i++] = id;
1445 }
d0cc439b
ACM
1446 }
1447
1448 if (err < 0) {
1449 fputs("\nHint:\ttry 'perf list syscalls:sys_enter_*'"
1450 "\nHint:\tand: 'man syscalls'\n", trace->output);
27702bcf 1451out_free:
8b3ce757
ACM
1452 zfree(&trace->ev_qualifier_ids.entries);
1453 trace->ev_qualifier_ids.nr = 0;
d0cc439b 1454 }
8b3ce757 1455out:
d0cc439b
ACM
1456 return err;
1457}
1458
55d43bca
DA
1459/*
1460 * args is to be interpreted as a series of longs but we need to handle
1461 * 8-byte unaligned accesses. args points to raw_data within the event
1462 * and raw_data is guaranteed to be 8-byte unaligned because it is
1463 * preceded by raw_size which is a u32. So we need to copy args to a temp
1464 * variable to read it. Most notably this avoids extended load instructions
1465 * on unaligned addresses
1466 */
325f5091 1467unsigned long syscall_arg__val(struct syscall_arg *arg, u8 idx)
f9f83b33
ACM
1468{
1469 unsigned long val;
325f5091 1470 unsigned char *p = arg->args + sizeof(unsigned long) * idx;
f9f83b33
ACM
1471
1472 memcpy(&val, p, sizeof(val));
1473 return val;
1474}
1475
c51bdfec
ACM
1476static size_t syscall__scnprintf_name(struct syscall *sc, char *bf, size_t size,
1477 struct syscall_arg *arg)
1478{
1479 if (sc->arg_fmt && sc->arg_fmt[arg->idx].name)
1480 return scnprintf(bf, size, "%s: ", sc->arg_fmt[arg->idx].name);
1481
1482 return scnprintf(bf, size, "arg%d: ", arg->idx);
1483}
1484
d032d79e
ACM
1485static size_t syscall__scnprintf_val(struct syscall *sc, char *bf, size_t size,
1486 struct syscall_arg *arg, unsigned long val)
1487{
1488 if (sc->arg_fmt && sc->arg_fmt[arg->idx].scnprintf) {
1489 arg->val = val;
1490 if (sc->arg_fmt[arg->idx].parm)
1491 arg->parm = sc->arg_fmt[arg->idx].parm;
1492 return sc->arg_fmt[arg->idx].scnprintf(bf, size, arg);
1493 }
1494 return scnprintf(bf, size, "%ld", val);
1495}
1496
752fde44 1497static size_t syscall__scnprintf_args(struct syscall *sc, char *bf, size_t size,
7a983a0f
ACM
1498 unsigned char *args, void *augmented_args, int augmented_args_size,
1499 struct trace *trace, struct thread *thread)
514f1c67 1500{
514f1c67 1501 size_t printed = 0;
55d43bca 1502 unsigned long val;
d032d79e
ACM
1503 u8 bit = 1;
1504 struct syscall_arg arg = {
1505 .args = args,
7a983a0f
ACM
1506 .augmented = {
1507 .size = augmented_args_size,
1508 .args = augmented_args,
1509 },
d032d79e
ACM
1510 .idx = 0,
1511 .mask = 0,
1512 .trace = trace,
1513 .thread = thread,
1514 };
84486caa
ACM
1515 struct thread_trace *ttrace = thread__priv(thread);
1516
1517 /*
1518 * Things like fcntl will set this in its 'cmd' formatter to pick the
1519 * right formatter for the return value (an fd? file flags?), which is
1520 * not needed for syscalls that always return a given type, say an fd.
1521 */
1522 ttrace->ret_scnprintf = NULL;
514f1c67 1523
f208bd8d 1524 if (sc->args != NULL) {
514f1c67 1525 struct format_field *field;
6e7eeb51 1526
f208bd8d 1527 for (field = sc->args; field;
01533e97
ACM
1528 field = field->next, ++arg.idx, bit <<= 1) {
1529 if (arg.mask & bit)
6e7eeb51 1530 continue;
55d43bca 1531
f9f83b33 1532 val = syscall_arg__val(&arg, arg.idx);
55d43bca 1533
4aa58232
ACM
1534 /*
1535 * Suppress this argument if its value is zero and
1536 * and we don't have a string associated in an
1537 * strarray for it.
1538 */
55d43bca 1539 if (val == 0 &&
82d4a110 1540 !(sc->arg_fmt &&
d47737d5
ACM
1541 (sc->arg_fmt[arg.idx].show_zero ||
1542 sc->arg_fmt[arg.idx].scnprintf == SCA_STRARRAY ||
82d4a110
ACM
1543 sc->arg_fmt[arg.idx].scnprintf == SCA_STRARRAYS) &&
1544 sc->arg_fmt[arg.idx].parm))
22ae5cf1
ACM
1545 continue;
1546
752fde44 1547 printed += scnprintf(bf + printed, size - printed,
13d4ff3e 1548 "%s%s: ", printed ? ", " : "", field->name);
d032d79e 1549 printed += syscall__scnprintf_val(sc, bf + printed, size - printed, &arg, val);
514f1c67 1550 }
4c4d6e51
ACM
1551 } else if (IS_ERR(sc->tp_format)) {
1552 /*
1553 * If we managed to read the tracepoint /format file, then we
1554 * may end up not having any args, like with gettid(), so only
1555 * print the raw args when we didn't manage to read it.
1556 */
332337da 1557 while (arg.idx < sc->nr_args) {
d032d79e
ACM
1558 if (arg.mask & bit)
1559 goto next_arg;
1560 val = syscall_arg__val(&arg, arg.idx);
c51bdfec
ACM
1561 if (printed)
1562 printed += scnprintf(bf + printed, size - printed, ", ");
1563 printed += syscall__scnprintf_name(sc, bf + printed, size - printed, &arg);
d032d79e
ACM
1564 printed += syscall__scnprintf_val(sc, bf + printed, size - printed, &arg, val);
1565next_arg:
1566 ++arg.idx;
1567 bit <<= 1;
514f1c67
ACM
1568 }
1569 }
1570
1571 return printed;
1572}
1573
ba3d7dee 1574typedef int (*tracepoint_handler)(struct trace *trace, struct perf_evsel *evsel,
0c82adcf 1575 union perf_event *event,
ba3d7dee
ACM
1576 struct perf_sample *sample);
1577
1578static struct syscall *trace__syscall_info(struct trace *trace,
bf2575c1 1579 struct perf_evsel *evsel, int id)
ba3d7dee 1580{
ba3d7dee
ACM
1581
1582 if (id < 0) {
adaa18bf
ACM
1583
1584 /*
1585 * XXX: Noticed on x86_64, reproduced as far back as 3.0.36, haven't tried
1586 * before that, leaving at a higher verbosity level till that is
1587 * explained. Reproduced with plain ftrace with:
1588 *
1589 * echo 1 > /t/events/raw_syscalls/sys_exit/enable
1590 * grep "NR -1 " /t/trace_pipe
1591 *
1592 * After generating some load on the machine.
1593 */
1594 if (verbose > 1) {
1595 static u64 n;
1596 fprintf(trace->output, "Invalid syscall %d id, skipping (%s, %" PRIu64 ") ...\n",
1597 id, perf_evsel__name(evsel), ++n);
1598 }
ba3d7dee
ACM
1599 return NULL;
1600 }
1601
1602 if ((id > trace->syscalls.max || trace->syscalls.table[id].name == NULL) &&
1603 trace__read_syscall_info(trace, id))
1604 goto out_cant_read;
1605
1606 if ((id > trace->syscalls.max || trace->syscalls.table[id].name == NULL))
1607 goto out_cant_read;
1608
1609 return &trace->syscalls.table[id];
1610
1611out_cant_read:
bb963e16 1612 if (verbose > 0) {
7c304ee0
ACM
1613 fprintf(trace->output, "Problems reading syscall %d", id);
1614 if (id <= trace->syscalls.max && trace->syscalls.table[id].name != NULL)
1615 fprintf(trace->output, "(%s)", trace->syscalls.table[id].name);
1616 fputs(" information\n", trace->output);
1617 }
ba3d7dee
ACM
1618 return NULL;
1619}
1620
bf2575c1
DA
1621static void thread__update_stats(struct thread_trace *ttrace,
1622 int id, struct perf_sample *sample)
1623{
1624 struct int_node *inode;
1625 struct stats *stats;
1626 u64 duration = 0;
1627
1628 inode = intlist__findnew(ttrace->syscall_stats, id);
1629 if (inode == NULL)
1630 return;
1631
1632 stats = inode->priv;
1633 if (stats == NULL) {
1634 stats = malloc(sizeof(struct stats));
1635 if (stats == NULL)
1636 return;
1637 init_stats(stats);
1638 inode->priv = stats;
1639 }
1640
1641 if (ttrace->entry_time && sample->time > ttrace->entry_time)
1642 duration = sample->time - ttrace->entry_time;
1643
1644 update_stats(stats, duration);
1645}
1646
522283fe 1647static int trace__printf_interrupted_entry(struct trace *trace)
e596663e
ACM
1648{
1649 struct thread_trace *ttrace;
e596663e
ACM
1650 size_t printed;
1651
0a6545bd 1652 if (trace->failure_only || trace->current == NULL)
e596663e
ACM
1653 return 0;
1654
1655 ttrace = thread__priv(trace->current);
1656
1657 if (!ttrace->entry_pending)
1658 return 0;
1659
522283fe 1660 printed = trace__fprintf_entry_head(trace, trace->current, 0, false, ttrace->entry_time, trace->output);
e596663e
ACM
1661 printed += fprintf(trace->output, "%-70s) ...\n", ttrace->entry_str);
1662 ttrace->entry_pending = false;
1663
1664 return printed;
1665}
1666
591421e1
ACM
1667static int trace__fprintf_sample(struct trace *trace, struct perf_evsel *evsel,
1668 struct perf_sample *sample, struct thread *thread)
1669{
1670 int printed = 0;
1671
1672 if (trace->print_sample) {
1673 double ts = (double)sample->time / NSEC_PER_MSEC;
1674
1675 printed += fprintf(trace->output, "%22s %10.3f %s %d/%d [%d]\n",
1676 perf_evsel__name(evsel), ts,
1677 thread__comm_str(thread),
1678 sample->pid, sample->tid, sample->cpu);
1679 }
1680
1681 return printed;
1682}
1683
ba3d7dee 1684static int trace__sys_enter(struct trace *trace, struct perf_evsel *evsel,
0c82adcf 1685 union perf_event *event __maybe_unused,
ba3d7dee
ACM
1686 struct perf_sample *sample)
1687{
752fde44 1688 char *msg;
ba3d7dee 1689 void *args;
752fde44 1690 size_t printed = 0;
2ae3a312 1691 struct thread *thread;
b91fc39f 1692 int id = perf_evsel__sc_tp_uint(evsel, id, sample), err = -1;
bf2575c1 1693 struct syscall *sc = trace__syscall_info(trace, evsel, id);
2ae3a312
ACM
1694 struct thread_trace *ttrace;
1695
1696 if (sc == NULL)
1697 return -1;
ba3d7dee 1698
8fb598e5 1699 thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
c24ff998 1700 ttrace = thread__trace(thread, trace->output);
2ae3a312 1701 if (ttrace == NULL)
b91fc39f 1702 goto out_put;
ba3d7dee 1703
591421e1
ACM
1704 trace__fprintf_sample(trace, evsel, sample, thread);
1705
77170988 1706 args = perf_evsel__sc_tp_ptr(evsel, args, sample);
752fde44
ACM
1707
1708 if (ttrace->entry_str == NULL) {
e4d44e83 1709 ttrace->entry_str = malloc(trace__entry_str_size);
752fde44 1710 if (!ttrace->entry_str)
b91fc39f 1711 goto out_put;
752fde44
ACM
1712 }
1713
5cf9c84e 1714 if (!(trace->duration_filter || trace->summary_only || trace->min_stack))
522283fe 1715 trace__printf_interrupted_entry(trace);
e596663e 1716
752fde44
ACM
1717 ttrace->entry_time = sample->time;
1718 msg = ttrace->entry_str;
e4d44e83 1719 printed += scnprintf(msg + printed, trace__entry_str_size - printed, "%s(", sc->name);
752fde44 1720
e4d44e83 1721 printed += syscall__scnprintf_args(sc, msg + printed, trace__entry_str_size - printed,
7a983a0f 1722 args, NULL, 0, trace, thread);
752fde44 1723
5089f20e 1724 if (sc->is_exit) {
0a6545bd 1725 if (!(trace->duration_filter || trace->summary_only || trace->failure_only || trace->min_stack)) {
fd2b2975 1726 trace__fprintf_entry_head(trace, thread, 0, false, ttrace->entry_time, trace->output);
c008f78f 1727 fprintf(trace->output, "%-70s)\n", ttrace->entry_str);
ae9ed035 1728 }
7f4f8001 1729 } else {
752fde44 1730 ttrace->entry_pending = true;
7f4f8001
ACM
1731 /* See trace__vfs_getname & trace__sys_exit */
1732 ttrace->filename.pending_open = false;
1733 }
ba3d7dee 1734
f3b623b8
ACM
1735 if (trace->current != thread) {
1736 thread__put(trace->current);
1737 trace->current = thread__get(thread);
1738 }
b91fc39f
ACM
1739 err = 0;
1740out_put:
1741 thread__put(thread);
1742 return err;
ba3d7dee
ACM
1743}
1744
a98392bb
ACM
1745static int trace__fprintf_sys_enter(struct trace *trace, struct perf_evsel *evsel,
1746 struct perf_sample *sample)
1747{
a98392bb
ACM
1748 struct thread_trace *ttrace;
1749 struct thread *thread;
f3acd886
ACM
1750 int id = perf_evsel__sc_tp_uint(evsel, id, sample), err = -1;
1751 struct syscall *sc = trace__syscall_info(trace, evsel, id);
a98392bb 1752 char msg[1024];
7a983a0f
ACM
1753 void *args, *augmented_args = NULL;
1754 int augmented_args_size;
a98392bb 1755
a98392bb
ACM
1756 if (sc == NULL)
1757 return -1;
1758
1759 thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
1760 ttrace = thread__trace(thread, trace->output);
1761 /*
1762 * We need to get ttrace just to make sure it is there when syscall__scnprintf_args()
1763 * and the rest of the beautifiers accessing it via struct syscall_arg touches it.
1764 */
1765 if (ttrace == NULL)
1766 goto out_put;
1767
f3acd886 1768 args = perf_evsel__sc_tp_ptr(evsel, args, sample);
7a983a0f
ACM
1769 augmented_args_size = sample->raw_size - sc->args_size;
1770 if (augmented_args_size > 0)
1771 augmented_args = sample->raw_data + sc->args_size;
1772
1773 syscall__scnprintf_args(sc, msg, sizeof(msg), args, augmented_args, augmented_args_size, trace, thread);
a98392bb
ACM
1774 fprintf(trace->output, "%s", msg);
1775 err = 0;
1776out_put:
1777 thread__put(thread);
1778 return err;
1779}
1780
5cf9c84e
ACM
1781static int trace__resolve_callchain(struct trace *trace, struct perf_evsel *evsel,
1782 struct perf_sample *sample,
1783 struct callchain_cursor *cursor)
202ff968
ACM
1784{
1785 struct addr_location al;
3a9e9a47
RB
1786 int max_stack = evsel->attr.sample_max_stack ?
1787 evsel->attr.sample_max_stack :
1788 trace->max_stack;
5cf9c84e
ACM
1789
1790 if (machine__resolve(trace->host, &al, sample) < 0 ||
3a9e9a47 1791 thread__resolve_callchain(al.thread, cursor, evsel, sample, NULL, NULL, max_stack))
5cf9c84e
ACM
1792 return -1;
1793
1794 return 0;
1795}
1796
1797static int trace__fprintf_callchain(struct trace *trace, struct perf_sample *sample)
1798{
202ff968 1799 /* TODO: user-configurable print_opts */
e20ab86e
ACM
1800 const unsigned int print_opts = EVSEL__PRINT_SYM |
1801 EVSEL__PRINT_DSO |
1802 EVSEL__PRINT_UNKNOWN_AS_ADDR;
202ff968 1803
d327e60c 1804 return sample__fprintf_callchain(sample, 38, print_opts, &callchain_cursor, trace->output);
202ff968
ACM
1805}
1806
092bd3cd
HB
1807static const char *errno_to_name(struct perf_evsel *evsel, int err)
1808{
1809 struct perf_env *env = perf_evsel__env(evsel);
1810 const char *arch_name = perf_env__arch(env);
1811
1812 return arch_syscalls__strerrno(arch_name, err);
1813}
1814
ba3d7dee 1815static int trace__sys_exit(struct trace *trace, struct perf_evsel *evsel,
0c82adcf 1816 union perf_event *event __maybe_unused,
ba3d7dee
ACM
1817 struct perf_sample *sample)
1818{
2c82c3ad 1819 long ret;
60c907ab 1820 u64 duration = 0;
fd2b2975 1821 bool duration_calculated = false;
2ae3a312 1822 struct thread *thread;
5cf9c84e 1823 int id = perf_evsel__sc_tp_uint(evsel, id, sample), err = -1, callchain_ret = 0;
bf2575c1 1824 struct syscall *sc = trace__syscall_info(trace, evsel, id);
2ae3a312
ACM
1825 struct thread_trace *ttrace;
1826
1827 if (sc == NULL)
1828 return -1;
ba3d7dee 1829
8fb598e5 1830 thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
c24ff998 1831 ttrace = thread__trace(thread, trace->output);
2ae3a312 1832 if (ttrace == NULL)
b91fc39f 1833 goto out_put;
ba3d7dee 1834
591421e1
ACM
1835 trace__fprintf_sample(trace, evsel, sample, thread);
1836
bf2575c1
DA
1837 if (trace->summary)
1838 thread__update_stats(ttrace, id, sample);
1839
77170988 1840 ret = perf_evsel__sc_tp_uint(evsel, ret, sample);
ba3d7dee 1841
6a648b53 1842 if (sc->is_open && ret >= 0 && ttrace->filename.pending_open) {
7f4f8001
ACM
1843 trace__set_fd_pathname(thread, ret, ttrace->filename.name);
1844 ttrace->filename.pending_open = false;
c522739d
ACM
1845 ++trace->stats.vfs_getname;
1846 }
1847
ae9ed035 1848 if (ttrace->entry_time) {
60c907ab 1849 duration = sample->time - ttrace->entry_time;
ae9ed035
ACM
1850 if (trace__filter_duration(trace, duration))
1851 goto out;
fd2b2975 1852 duration_calculated = true;
ae9ed035
ACM
1853 } else if (trace->duration_filter)
1854 goto out;
60c907ab 1855
5cf9c84e
ACM
1856 if (sample->callchain) {
1857 callchain_ret = trace__resolve_callchain(trace, evsel, sample, &callchain_cursor);
1858 if (callchain_ret == 0) {
1859 if (callchain_cursor.nr < trace->min_stack)
1860 goto out;
1861 callchain_ret = 1;
1862 }
1863 }
1864
0a6545bd 1865 if (trace->summary_only || (ret >= 0 && trace->failure_only))
fd2eabaf
DA
1866 goto out;
1867
fd2b2975 1868 trace__fprintf_entry_head(trace, thread, duration, duration_calculated, ttrace->entry_time, trace->output);
752fde44
ACM
1869
1870 if (ttrace->entry_pending) {
c24ff998 1871 fprintf(trace->output, "%-70s", ttrace->entry_str);
752fde44 1872 } else {
c24ff998
ACM
1873 fprintf(trace->output, " ... [");
1874 color_fprintf(trace->output, PERF_COLOR_YELLOW, "continued");
1875 fprintf(trace->output, "]: %s()", sc->name);
752fde44
ACM
1876 }
1877
da3c9a44 1878 if (sc->fmt == NULL) {
1f63139c
ACM
1879 if (ret < 0)
1880 goto errno_print;
da3c9a44 1881signed_print:
6f8fe61e 1882 fprintf(trace->output, ") = %ld", ret);
1f63139c
ACM
1883 } else if (ret < 0) {
1884errno_print: {
942a91ed 1885 char bf[STRERR_BUFSIZE];
c8b5f2c9 1886 const char *emsg = str_error_r(-ret, bf, sizeof(bf)),
092bd3cd 1887 *e = errno_to_name(evsel, -ret);
ba3d7dee 1888
c24ff998 1889 fprintf(trace->output, ") = -1 %s %s", e, emsg);
1f63139c 1890 }
da3c9a44 1891 } else if (ret == 0 && sc->fmt->timeout)
c24ff998 1892 fprintf(trace->output, ") = 0 Timeout");
84486caa
ACM
1893 else if (ttrace->ret_scnprintf) {
1894 char bf[1024];
7ee57434
ACM
1895 struct syscall_arg arg = {
1896 .val = ret,
1897 .thread = thread,
1898 .trace = trace,
1899 };
1900 ttrace->ret_scnprintf(bf, sizeof(bf), &arg);
84486caa
ACM
1901 ttrace->ret_scnprintf = NULL;
1902 fprintf(trace->output, ") = %s", bf);
1903 } else if (sc->fmt->hexret)
2c82c3ad 1904 fprintf(trace->output, ") = %#lx", ret);
11c8e39f
ACM
1905 else if (sc->fmt->errpid) {
1906 struct thread *child = machine__find_thread(trace->host, ret, ret);
1907
1908 if (child != NULL) {
1909 fprintf(trace->output, ") = %ld", ret);
1910 if (child->comm_set)
1911 fprintf(trace->output, " (%s)", thread__comm_str(child));
1912 thread__put(child);
1913 }
1914 } else
da3c9a44 1915 goto signed_print;
ba3d7dee 1916
c24ff998 1917 fputc('\n', trace->output);
566a0885 1918
5cf9c84e
ACM
1919 if (callchain_ret > 0)
1920 trace__fprintf_callchain(trace, sample);
1921 else if (callchain_ret < 0)
1922 pr_err("Problem processing %s callchain, skipping...\n", perf_evsel__name(evsel));
ae9ed035 1923out:
752fde44 1924 ttrace->entry_pending = false;
b91fc39f
ACM
1925 err = 0;
1926out_put:
1927 thread__put(thread);
1928 return err;
ba3d7dee
ACM
1929}
1930
c522739d 1931static int trace__vfs_getname(struct trace *trace, struct perf_evsel *evsel,
0c82adcf 1932 union perf_event *event __maybe_unused,
c522739d
ACM
1933 struct perf_sample *sample)
1934{
f994592d
ACM
1935 struct thread *thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
1936 struct thread_trace *ttrace;
1937 size_t filename_len, entry_str_len, to_move;
1938 ssize_t remaining_space;
1939 char *pos;
7f4f8001 1940 const char *filename = perf_evsel__rawptr(evsel, sample, "pathname");
f994592d
ACM
1941
1942 if (!thread)
1943 goto out;
1944
1945 ttrace = thread__priv(thread);
1946 if (!ttrace)
ef65e96e 1947 goto out_put;
f994592d 1948
7f4f8001 1949 filename_len = strlen(filename);
39f0e7a8 1950 if (filename_len == 0)
ef65e96e 1951 goto out_put;
7f4f8001
ACM
1952
1953 if (ttrace->filename.namelen < filename_len) {
1954 char *f = realloc(ttrace->filename.name, filename_len + 1);
1955
1956 if (f == NULL)
ef65e96e 1957 goto out_put;
7f4f8001
ACM
1958
1959 ttrace->filename.namelen = filename_len;
1960 ttrace->filename.name = f;
1961 }
1962
1963 strcpy(ttrace->filename.name, filename);
1964 ttrace->filename.pending_open = true;
1965
f994592d 1966 if (!ttrace->filename.ptr)
ef65e96e 1967 goto out_put;
f994592d
ACM
1968
1969 entry_str_len = strlen(ttrace->entry_str);
1970 remaining_space = trace__entry_str_size - entry_str_len - 1; /* \0 */
1971 if (remaining_space <= 0)
ef65e96e 1972 goto out_put;
f994592d 1973
f994592d
ACM
1974 if (filename_len > (size_t)remaining_space) {
1975 filename += filename_len - remaining_space;
1976 filename_len = remaining_space;
1977 }
1978
1979 to_move = entry_str_len - ttrace->filename.entry_str_pos + 1; /* \0 */
1980 pos = ttrace->entry_str + ttrace->filename.entry_str_pos;
1981 memmove(pos + filename_len, pos, to_move);
1982 memcpy(pos, filename, filename_len);
1983
1984 ttrace->filename.ptr = 0;
1985 ttrace->filename.entry_str_pos = 0;
ef65e96e
ACM
1986out_put:
1987 thread__put(thread);
f994592d 1988out:
c522739d
ACM
1989 return 0;
1990}
1991
1302d88e 1992static int trace__sched_stat_runtime(struct trace *trace, struct perf_evsel *evsel,
0c82adcf 1993 union perf_event *event __maybe_unused,
1302d88e
ACM
1994 struct perf_sample *sample)
1995{
1996 u64 runtime = perf_evsel__intval(evsel, sample, "runtime");
1997 double runtime_ms = (double)runtime / NSEC_PER_MSEC;
8fb598e5 1998 struct thread *thread = machine__findnew_thread(trace->host,
314add6b
AH
1999 sample->pid,
2000 sample->tid);
c24ff998 2001 struct thread_trace *ttrace = thread__trace(thread, trace->output);
1302d88e
ACM
2002
2003 if (ttrace == NULL)
2004 goto out_dump;
2005
2006 ttrace->runtime_ms += runtime_ms;
2007 trace->runtime_ms += runtime_ms;
ef65e96e 2008out_put:
b91fc39f 2009 thread__put(thread);
1302d88e
ACM
2010 return 0;
2011
2012out_dump:
c24ff998 2013 fprintf(trace->output, "%s: comm=%s,pid=%u,runtime=%" PRIu64 ",vruntime=%" PRIu64 ")\n",
1302d88e
ACM
2014 evsel->name,
2015 perf_evsel__strval(evsel, sample, "comm"),
2016 (pid_t)perf_evsel__intval(evsel, sample, "pid"),
2017 runtime,
2018 perf_evsel__intval(evsel, sample, "vruntime"));
ef65e96e 2019 goto out_put;
1302d88e
ACM
2020}
2021
923d0c9a
ACM
2022static int bpf_output__printer(enum binary_printer_ops op,
2023 unsigned int val, void *extra __maybe_unused, FILE *fp)
1d6c9407 2024{
1d6c9407
WN
2025 unsigned char ch = (unsigned char)val;
2026
2027 switch (op) {
2028 case BINARY_PRINT_CHAR_DATA:
923d0c9a 2029 return fprintf(fp, "%c", isprint(ch) ? ch : '.');
1d6c9407
WN
2030 case BINARY_PRINT_DATA_BEGIN:
2031 case BINARY_PRINT_LINE_BEGIN:
2032 case BINARY_PRINT_ADDR:
2033 case BINARY_PRINT_NUM_DATA:
2034 case BINARY_PRINT_NUM_PAD:
2035 case BINARY_PRINT_SEP:
2036 case BINARY_PRINT_CHAR_PAD:
2037 case BINARY_PRINT_LINE_END:
2038 case BINARY_PRINT_DATA_END:
2039 default:
2040 break;
2041 }
923d0c9a
ACM
2042
2043 return 0;
1d6c9407
WN
2044}
2045
2046static void bpf_output__fprintf(struct trace *trace,
2047 struct perf_sample *sample)
2048{
923d0c9a
ACM
2049 binary__fprintf(sample->raw_data, sample->raw_size, 8,
2050 bpf_output__printer, NULL, trace->output);
1d6c9407
WN
2051}
2052
14a052df
ACM
2053static int trace__event_handler(struct trace *trace, struct perf_evsel *evsel,
2054 union perf_event *event __maybe_unused,
2055 struct perf_sample *sample)
2056{
7ad35615
ACM
2057 int callchain_ret = 0;
2058
2059 if (sample->callchain) {
2060 callchain_ret = trace__resolve_callchain(trace, evsel, sample, &callchain_cursor);
2061 if (callchain_ret == 0) {
2062 if (callchain_cursor.nr < trace->min_stack)
2063 goto out;
2064 callchain_ret = 1;
2065 }
2066 }
2067
522283fe 2068 trace__printf_interrupted_entry(trace);
14a052df 2069 trace__fprintf_tstamp(trace, sample->time, trace->output);
0808921a
ACM
2070
2071 if (trace->trace_syscalls)
2072 fprintf(trace->output, "( ): ");
2073
1cdf618f
ACM
2074 if (evsel == trace->syscalls.events.augmented) {
2075 int id = perf_evsel__sc_tp_uint(evsel, id, sample);
2076 struct syscall *sc = trace__syscall_info(trace, evsel, id);
2077
2078 if (sc) {
2079 struct thread *thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
2080
2081 if (thread) {
c96f4edc 2082 trace__fprintf_comm_tid(trace, thread, trace->output);
1cdf618f
ACM
2083 fprintf(trace->output, "%s(", sc->name);
2084 trace__fprintf_sys_enter(trace, evsel, sample);
2085 fputc(')', trace->output);
2086 thread__put(thread);
2087 goto newline;
2088 }
2089 }
2090
2091 /*
2092 * XXX: Not having the associated syscall info or not finding/adding
2093 * the thread should never happen, but if it does...
2094 * fall thru and print it as a bpf_output event.
2095 */
2096 }
2097
0808921a 2098 fprintf(trace->output, "%s:", evsel->name);
14a052df 2099
1d6c9407 2100 if (perf_evsel__is_bpf_output(evsel)) {
1cdf618f 2101 bpf_output__fprintf(trace, sample);
1d6c9407 2102 } else if (evsel->tp_format) {
a98392bb
ACM
2103 if (strncmp(evsel->tp_format->name, "sys_enter_", 10) ||
2104 trace__fprintf_sys_enter(trace, evsel, sample)) {
2105 event_format__fprintf(evsel->tp_format, sample->cpu,
2106 sample->raw_data, sample->raw_size,
2107 trace->output);
2108 }
14a052df
ACM
2109 }
2110
1cdf618f 2111newline:
51125a29 2112 fprintf(trace->output, "\n");
202ff968 2113
7ad35615
ACM
2114 if (callchain_ret > 0)
2115 trace__fprintf_callchain(trace, sample);
2116 else if (callchain_ret < 0)
2117 pr_err("Problem processing %s callchain, skipping...\n", perf_evsel__name(evsel));
2118out:
14a052df
ACM
2119 return 0;
2120}
2121
598d02c5
SF
2122static void print_location(FILE *f, struct perf_sample *sample,
2123 struct addr_location *al,
2124 bool print_dso, bool print_sym)
2125{
2126
bb963e16 2127 if ((verbose > 0 || print_dso) && al->map)
598d02c5
SF
2128 fprintf(f, "%s@", al->map->dso->long_name);
2129
bb963e16 2130 if ((verbose > 0 || print_sym) && al->sym)
4414a3c5 2131 fprintf(f, "%s+0x%" PRIx64, al->sym->name,
598d02c5
SF
2132 al->addr - al->sym->start);
2133 else if (al->map)
4414a3c5 2134 fprintf(f, "0x%" PRIx64, al->addr);
598d02c5 2135 else
4414a3c5 2136 fprintf(f, "0x%" PRIx64, sample->addr);
598d02c5
SF
2137}
2138
2139static int trace__pgfault(struct trace *trace,
2140 struct perf_evsel *evsel,
473398a2 2141 union perf_event *event __maybe_unused,
598d02c5
SF
2142 struct perf_sample *sample)
2143{
2144 struct thread *thread;
598d02c5
SF
2145 struct addr_location al;
2146 char map_type = 'd';
a2ea67d7 2147 struct thread_trace *ttrace;
b91fc39f 2148 int err = -1;
1df54290 2149 int callchain_ret = 0;
598d02c5
SF
2150
2151 thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
1df54290
ACM
2152
2153 if (sample->callchain) {
2154 callchain_ret = trace__resolve_callchain(trace, evsel, sample, &callchain_cursor);
2155 if (callchain_ret == 0) {
2156 if (callchain_cursor.nr < trace->min_stack)
2157 goto out_put;
2158 callchain_ret = 1;
2159 }
2160 }
2161
a2ea67d7
SF
2162 ttrace = thread__trace(thread, trace->output);
2163 if (ttrace == NULL)
b91fc39f 2164 goto out_put;
a2ea67d7
SF
2165
2166 if (evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS_MAJ)
2167 ttrace->pfmaj++;
2168 else
2169 ttrace->pfmin++;
2170
2171 if (trace->summary_only)
b91fc39f 2172 goto out;
598d02c5 2173
4546263d 2174 thread__find_symbol(thread, sample->cpumode, sample->ip, &al);
598d02c5 2175
fd2b2975 2176 trace__fprintf_entry_head(trace, thread, 0, true, sample->time, trace->output);
598d02c5
SF
2177
2178 fprintf(trace->output, "%sfault [",
2179 evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS_MAJ ?
2180 "maj" : "min");
2181
2182 print_location(trace->output, sample, &al, false, true);
2183
2184 fprintf(trace->output, "] => ");
2185
117d3c24 2186 thread__find_symbol(thread, sample->cpumode, sample->addr, &al);
598d02c5
SF
2187
2188 if (!al.map) {
4546263d 2189 thread__find_symbol(thread, sample->cpumode, sample->addr, &al);
598d02c5
SF
2190
2191 if (al.map)
2192 map_type = 'x';
2193 else
2194 map_type = '?';
2195 }
2196
2197 print_location(trace->output, sample, &al, true, false);
2198
2199 fprintf(trace->output, " (%c%c)\n", map_type, al.level);
0c3a6ef4 2200
1df54290
ACM
2201 if (callchain_ret > 0)
2202 trace__fprintf_callchain(trace, sample);
2203 else if (callchain_ret < 0)
2204 pr_err("Problem processing %s callchain, skipping...\n", perf_evsel__name(evsel));
b91fc39f
ACM
2205out:
2206 err = 0;
2207out_put:
2208 thread__put(thread);
2209 return err;
598d02c5
SF
2210}
2211
e6001980 2212static void trace__set_base_time(struct trace *trace,
8a07a809 2213 struct perf_evsel *evsel,
e6001980
ACM
2214 struct perf_sample *sample)
2215{
8a07a809
ACM
2216 /*
2217 * BPF events were not setting PERF_SAMPLE_TIME, so be more robust
2218 * and don't use sample->time unconditionally, we may end up having
2219 * some other event in the future without PERF_SAMPLE_TIME for good
2220 * reason, i.e. we may not be interested in its timestamps, just in
2221 * it taking place, picking some piece of information when it
2222 * appears in our event stream (vfs_getname comes to mind).
2223 */
2224 if (trace->base_time == 0 && !trace->full_time &&
2225 (evsel->attr.sample_type & PERF_SAMPLE_TIME))
e6001980
ACM
2226 trace->base_time = sample->time;
2227}
2228
6810fc91 2229static int trace__process_sample(struct perf_tool *tool,
0c82adcf 2230 union perf_event *event,
6810fc91
DA
2231 struct perf_sample *sample,
2232 struct perf_evsel *evsel,
2233 struct machine *machine __maybe_unused)
2234{
2235 struct trace *trace = container_of(tool, struct trace, tool);
aa07df6e 2236 struct thread *thread;
6810fc91
DA
2237 int err = 0;
2238
744a9719 2239 tracepoint_handler handler = evsel->handler;
6810fc91 2240
aa07df6e
DA
2241 thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
2242 if (thread && thread__is_filtered(thread))
ef65e96e 2243 goto out;
bdc89661 2244
e6001980 2245 trace__set_base_time(trace, evsel, sample);
6810fc91 2246
3160565f
DA
2247 if (handler) {
2248 ++trace->nr_events;
0c82adcf 2249 handler(trace, evsel, event, sample);
3160565f 2250 }
ef65e96e
ACM
2251out:
2252 thread__put(thread);
6810fc91
DA
2253 return err;
2254}
2255
1e28fe0a 2256static int trace__record(struct trace *trace, int argc, const char **argv)
5e2485b1
DA
2257{
2258 unsigned int rec_argc, i, j;
2259 const char **rec_argv;
2260 const char * const record_args[] = {
2261 "record",
2262 "-R",
2263 "-m", "1024",
2264 "-c", "1",
5e2485b1
DA
2265 };
2266
1e28fe0a
SF
2267 const char * const sc_args[] = { "-e", };
2268 unsigned int sc_args_nr = ARRAY_SIZE(sc_args);
2269 const char * const majpf_args[] = { "-e", "major-faults" };
2270 unsigned int majpf_args_nr = ARRAY_SIZE(majpf_args);
2271 const char * const minpf_args[] = { "-e", "minor-faults" };
2272 unsigned int minpf_args_nr = ARRAY_SIZE(minpf_args);
2273
9aca7f17 2274 /* +1 is for the event string below */
1e28fe0a
SF
2275 rec_argc = ARRAY_SIZE(record_args) + sc_args_nr + 1 +
2276 majpf_args_nr + minpf_args_nr + argc;
5e2485b1
DA
2277 rec_argv = calloc(rec_argc + 1, sizeof(char *));
2278
2279 if (rec_argv == NULL)
2280 return -ENOMEM;
2281
1e28fe0a 2282 j = 0;
5e2485b1 2283 for (i = 0; i < ARRAY_SIZE(record_args); i++)
1e28fe0a
SF
2284 rec_argv[j++] = record_args[i];
2285
e281a960
SF
2286 if (trace->trace_syscalls) {
2287 for (i = 0; i < sc_args_nr; i++)
2288 rec_argv[j++] = sc_args[i];
2289
2290 /* event string may be different for older kernels - e.g., RHEL6 */
2291 if (is_valid_tracepoint("raw_syscalls:sys_enter"))
2292 rec_argv[j++] = "raw_syscalls:sys_enter,raw_syscalls:sys_exit";
2293 else if (is_valid_tracepoint("syscalls:sys_enter"))
2294 rec_argv[j++] = "syscalls:sys_enter,syscalls:sys_exit";
2295 else {
2296 pr_err("Neither raw_syscalls nor syscalls events exist.\n");
c896f85a 2297 free(rec_argv);
e281a960
SF
2298 return -1;
2299 }
9aca7f17 2300 }
9aca7f17 2301
1e28fe0a
SF
2302 if (trace->trace_pgfaults & TRACE_PFMAJ)
2303 for (i = 0; i < majpf_args_nr; i++)
2304 rec_argv[j++] = majpf_args[i];
2305
2306 if (trace->trace_pgfaults & TRACE_PFMIN)
2307 for (i = 0; i < minpf_args_nr; i++)
2308 rec_argv[j++] = minpf_args[i];
2309
2310 for (i = 0; i < (unsigned int)argc; i++)
2311 rec_argv[j++] = argv[i];
5e2485b1 2312
b0ad8ea6 2313 return cmd_record(j, rec_argv);
5e2485b1
DA
2314}
2315
bf2575c1
DA
2316static size_t trace__fprintf_thread_summary(struct trace *trace, FILE *fp);
2317
08c98776 2318static bool perf_evlist__add_vfs_getname(struct perf_evlist *evlist)
c522739d 2319{
ef503831 2320 struct perf_evsel *evsel = perf_evsel__newtp("probe", "vfs_getname");
8dd2a131
JO
2321
2322 if (IS_ERR(evsel))
08c98776 2323 return false;
c522739d
ACM
2324
2325 if (perf_evsel__field(evsel, "pathname") == NULL) {
2326 perf_evsel__delete(evsel);
08c98776 2327 return false;
c522739d
ACM
2328 }
2329
744a9719 2330 evsel->handler = trace__vfs_getname;
c522739d 2331 perf_evlist__add(evlist, evsel);
08c98776 2332 return true;
c522739d
ACM
2333}
2334
0ae537cb 2335static struct perf_evsel *perf_evsel__new_pgfault(u64 config)
598d02c5
SF
2336{
2337 struct perf_evsel *evsel;
2338 struct perf_event_attr attr = {
2339 .type = PERF_TYPE_SOFTWARE,
2340 .mmap_data = 1,
598d02c5
SF
2341 };
2342
2343 attr.config = config;
0524798c 2344 attr.sample_period = 1;
598d02c5
SF
2345
2346 event_attr_init(&attr);
2347
2348 evsel = perf_evsel__new(&attr);
0ae537cb
ACM
2349 if (evsel)
2350 evsel->handler = trace__pgfault;
598d02c5 2351
0ae537cb 2352 return evsel;
598d02c5
SF
2353}
2354
ddbb1b13
ACM
2355static void trace__handle_event(struct trace *trace, union perf_event *event, struct perf_sample *sample)
2356{
2357 const u32 type = event->header.type;
2358 struct perf_evsel *evsel;
2359
ddbb1b13
ACM
2360 if (type != PERF_RECORD_SAMPLE) {
2361 trace__process_event(trace, trace->host, event, sample);
2362 return;
2363 }
2364
2365 evsel = perf_evlist__id2evsel(trace->evlist, sample->id);
2366 if (evsel == NULL) {
2367 fprintf(trace->output, "Unknown tp ID %" PRIu64 ", skipping...\n", sample->id);
2368 return;
2369 }
2370
e6001980
ACM
2371 trace__set_base_time(trace, evsel, sample);
2372
ddbb1b13
ACM
2373 if (evsel->attr.type == PERF_TYPE_TRACEPOINT &&
2374 sample->raw_data == NULL) {
2375 fprintf(trace->output, "%s sample with no payload for tid: %d, cpu %d, raw_size=%d, skipping...\n",
2376 perf_evsel__name(evsel), sample->tid,
2377 sample->cpu, sample->raw_size);
2378 } else {
2379 tracepoint_handler handler = evsel->handler;
2380 handler(trace, evsel, event, sample);
2381 }
2382}
2383
c27366f0
ACM
2384static int trace__add_syscall_newtp(struct trace *trace)
2385{
2386 int ret = -1;
2387 struct perf_evlist *evlist = trace->evlist;
2388 struct perf_evsel *sys_enter, *sys_exit;
2389
63f11c80 2390 sys_enter = perf_evsel__raw_syscall_newtp("sys_enter", trace__sys_enter);
c27366f0
ACM
2391 if (sys_enter == NULL)
2392 goto out;
2393
2394 if (perf_evsel__init_sc_tp_ptr_field(sys_enter, args))
2395 goto out_delete_sys_enter;
2396
63f11c80 2397 sys_exit = perf_evsel__raw_syscall_newtp("sys_exit", trace__sys_exit);
c27366f0
ACM
2398 if (sys_exit == NULL)
2399 goto out_delete_sys_enter;
2400
2401 if (perf_evsel__init_sc_tp_uint_field(sys_exit, ret))
2402 goto out_delete_sys_exit;
2403
08e26396
ACM
2404 perf_evsel__config_callchain(sys_enter, &trace->opts, &callchain_param);
2405 perf_evsel__config_callchain(sys_exit, &trace->opts, &callchain_param);
2406
c27366f0
ACM
2407 perf_evlist__add(evlist, sys_enter);
2408 perf_evlist__add(evlist, sys_exit);
2409
2ddd5c04 2410 if (callchain_param.enabled && !trace->kernel_syscallchains) {
44621819
ACM
2411 /*
2412 * We're interested only in the user space callchain
2413 * leading to the syscall, allow overriding that for
2414 * debugging reasons using --kernel_syscall_callchains
2415 */
2416 sys_exit->attr.exclude_callchain_kernel = 1;
2417 }
2418
8b3ce757
ACM
2419 trace->syscalls.events.sys_enter = sys_enter;
2420 trace->syscalls.events.sys_exit = sys_exit;
c27366f0
ACM
2421
2422 ret = 0;
2423out:
2424 return ret;
2425
2426out_delete_sys_exit:
2427 perf_evsel__delete_priv(sys_exit);
2428out_delete_sys_enter:
2429 perf_evsel__delete_priv(sys_enter);
2430 goto out;
2431}
2432
19867b61
ACM
2433static int trace__set_ev_qualifier_filter(struct trace *trace)
2434{
2435 int err = -1;
b15d0a4c 2436 struct perf_evsel *sys_exit;
19867b61
ACM
2437 char *filter = asprintf_expr_inout_ints("id", !trace->not_ev_qualifier,
2438 trace->ev_qualifier_ids.nr,
2439 trace->ev_qualifier_ids.entries);
2440
2441 if (filter == NULL)
2442 goto out_enomem;
2443
3541c034
MP
2444 if (!perf_evsel__append_tp_filter(trace->syscalls.events.sys_enter,
2445 filter)) {
b15d0a4c 2446 sys_exit = trace->syscalls.events.sys_exit;
3541c034 2447 err = perf_evsel__append_tp_filter(sys_exit, filter);
b15d0a4c 2448 }
19867b61
ACM
2449
2450 free(filter);
2451out:
2452 return err;
2453out_enomem:
2454 errno = ENOMEM;
2455 goto out;
2456}
c27366f0 2457
dd1a5037
ACM
2458static int trace__set_filter_loop_pids(struct trace *trace)
2459{
082ab9a1 2460 unsigned int nr = 1;
dd1a5037
ACM
2461 pid_t pids[32] = {
2462 getpid(),
2463 };
082ab9a1
ACM
2464 struct thread *thread = machine__find_thread(trace->host, pids[0], pids[0]);
2465
2466 while (thread && nr < ARRAY_SIZE(pids)) {
2467 struct thread *parent = machine__find_thread(trace->host, thread->ppid, thread->ppid);
2468
2469 if (parent == NULL)
2470 break;
2471
2472 if (!strcmp(thread__comm_str(parent), "sshd")) {
2473 pids[nr++] = parent->tid;
2474 break;
2475 }
2476 thread = parent;
2477 }
dd1a5037
ACM
2478
2479 return perf_evlist__set_filter_pids(trace->evlist, nr, pids);
2480}
2481
f15eb531 2482static int trace__run(struct trace *trace, int argc, const char **argv)
514f1c67 2483{
14a052df 2484 struct perf_evlist *evlist = trace->evlist;
0ae537cb 2485 struct perf_evsel *evsel, *pgfault_maj = NULL, *pgfault_min = NULL;
efd5745e
ACM
2486 int err = -1, i;
2487 unsigned long before;
f15eb531 2488 const bool forks = argc > 0;
46fb3c21 2489 bool draining = false;
514f1c67 2490
75b757ca
ACM
2491 trace->live = true;
2492
c27366f0 2493 if (trace->trace_syscalls && trace__add_syscall_newtp(trace))
801c67b0 2494 goto out_error_raw_syscalls;
514f1c67 2495
e281a960 2496 if (trace->trace_syscalls)
08c98776 2497 trace->vfs_getname = perf_evlist__add_vfs_getname(evlist);
c522739d 2498
0ae537cb
ACM
2499 if ((trace->trace_pgfaults & TRACE_PFMAJ)) {
2500 pgfault_maj = perf_evsel__new_pgfault(PERF_COUNT_SW_PAGE_FAULTS_MAJ);
2501 if (pgfault_maj == NULL)
2502 goto out_error_mem;
08e26396 2503 perf_evsel__config_callchain(pgfault_maj, &trace->opts, &callchain_param);
0ae537cb 2504 perf_evlist__add(evlist, pgfault_maj);
e2726d99 2505 }
598d02c5 2506
0ae537cb
ACM
2507 if ((trace->trace_pgfaults & TRACE_PFMIN)) {
2508 pgfault_min = perf_evsel__new_pgfault(PERF_COUNT_SW_PAGE_FAULTS_MIN);
2509 if (pgfault_min == NULL)
2510 goto out_error_mem;
08e26396 2511 perf_evsel__config_callchain(pgfault_min, &trace->opts, &callchain_param);
0ae537cb
ACM
2512 perf_evlist__add(evlist, pgfault_min);
2513 }
598d02c5 2514
1302d88e 2515 if (trace->sched &&
2cc990ba
ACM
2516 perf_evlist__add_newtp(evlist, "sched", "sched_stat_runtime",
2517 trace__sched_stat_runtime))
2518 goto out_error_sched_stat_runtime;
1302d88e 2519
9ea42ba4
ACM
2520 /*
2521 * If a global cgroup was set, apply it to all the events without an
2522 * explicit cgroup. I.e.:
2523 *
2524 * trace -G A -e sched:*switch
2525 *
2526 * Will set all raw_syscalls:sys_{enter,exit}, pgfault, vfs_getname, etc
2527 * _and_ sched:sched_switch to the 'A' cgroup, while:
2528 *
2529 * trace -e sched:*switch -G A
2530 *
2531 * will only set the sched:sched_switch event to the 'A' cgroup, all the
2532 * other events (raw_syscalls:sys_{enter,exit}, etc are left "without"
2533 * a cgroup (on the root cgroup, sys wide, etc).
2534 *
2535 * Multiple cgroups:
2536 *
2537 * trace -G A -e sched:*switch -G B
2538 *
2539 * the syscall ones go to the 'A' cgroup, the sched:sched_switch goes
2540 * to the 'B' cgroup.
2541 *
2542 * evlist__set_default_cgroup() grabs a reference of the passed cgroup
2543 * only for the evsels still without a cgroup, i.e. evsel->cgroup == NULL.
2544 */
2545 if (trace->cgroup)
2546 evlist__set_default_cgroup(trace->evlist, trace->cgroup);
2547
514f1c67
ACM
2548 err = perf_evlist__create_maps(evlist, &trace->opts.target);
2549 if (err < 0) {
c24ff998 2550 fprintf(trace->output, "Problems parsing the target to trace, check your options!\n");
514f1c67
ACM
2551 goto out_delete_evlist;
2552 }
2553
752fde44
ACM
2554 err = trace__symbols_init(trace, evlist);
2555 if (err < 0) {
c24ff998 2556 fprintf(trace->output, "Problems initializing symbol libraries!\n");
03ad9747 2557 goto out_delete_evlist;
752fde44
ACM
2558 }
2559
75d50117 2560 perf_evlist__config(evlist, &trace->opts, &callchain_param);
fde54b78 2561
f15eb531
NK
2562 signal(SIGCHLD, sig_handler);
2563 signal(SIGINT, sig_handler);
2564
2565 if (forks) {
6ef73ec4 2566 err = perf_evlist__prepare_workload(evlist, &trace->opts.target,
735f7e0b 2567 argv, false, NULL);
f15eb531 2568 if (err < 0) {
c24ff998 2569 fprintf(trace->output, "Couldn't run the workload!\n");
03ad9747 2570 goto out_delete_evlist;
f15eb531
NK
2571 }
2572 }
2573
514f1c67 2574 err = perf_evlist__open(evlist);
a8f23d8f
ACM
2575 if (err < 0)
2576 goto out_error_open;
514f1c67 2577
ba504235
WN
2578 err = bpf__apply_obj_config();
2579 if (err) {
2580 char errbuf[BUFSIZ];
2581
2582 bpf__strerror_apply_obj_config(err, errbuf, sizeof(errbuf));
2583 pr_err("ERROR: Apply config to BPF failed: %s\n",
2584 errbuf);
2585 goto out_error_open;
2586 }
2587
241b057c
ACM
2588 /*
2589 * Better not use !target__has_task() here because we need to cover the
2590 * case where no threads were specified in the command line, but a
2591 * workload was, and in that case we will fill in the thread_map when
2592 * we fork the workload in perf_evlist__prepare_workload.
2593 */
f078c385
ACM
2594 if (trace->filter_pids.nr > 0)
2595 err = perf_evlist__set_filter_pids(evlist, trace->filter_pids.nr, trace->filter_pids.entries);
e13798c7 2596 else if (thread_map__pid(evlist->threads, 0) == -1)
dd1a5037 2597 err = trace__set_filter_loop_pids(trace);
f078c385 2598
94ad89bc
ACM
2599 if (err < 0)
2600 goto out_error_mem;
2601
19867b61
ACM
2602 if (trace->ev_qualifier_ids.nr > 0) {
2603 err = trace__set_ev_qualifier_filter(trace);
2604 if (err < 0)
2605 goto out_errno;
19867b61 2606
2e5e5f87
ACM
2607 pr_debug("event qualifier tracepoint filter: %s\n",
2608 trace->syscalls.events.sys_exit->filter);
2609 }
19867b61 2610
94ad89bc
ACM
2611 err = perf_evlist__apply_filters(evlist, &evsel);
2612 if (err < 0)
2613 goto out_error_apply_filters;
241b057c 2614
f74b9d3a 2615 err = perf_evlist__mmap(evlist, trace->opts.mmap_pages);
e09b18d4
ACM
2616 if (err < 0)
2617 goto out_error_mmap;
514f1c67 2618
e36b7821 2619 if (!target__none(&trace->opts.target) && !trace->opts.initial_delay)
cb24d01d
ACM
2620 perf_evlist__enable(evlist);
2621
f15eb531
NK
2622 if (forks)
2623 perf_evlist__start_workload(evlist);
2624
e36b7821
AB
2625 if (trace->opts.initial_delay) {
2626 usleep(trace->opts.initial_delay * 1000);
2627 perf_evlist__enable(evlist);
2628 }
2629
e13798c7 2630 trace->multiple_threads = thread_map__pid(evlist->threads, 0) == -1 ||
42052bea
ACM
2631 evlist->threads->nr > 1 ||
2632 perf_evlist__first(evlist)->attr.inherit;
bd3dda9a
ACM
2633
2634 /*
2635 * Now that we already used evsel->attr to ask the kernel to setup the
2636 * events, lets reuse evsel->attr.sample_max_stack as the limit in
2637 * trace__resolve_callchain(), allowing per-event max-stack settings
2638 * to override an explicitely set --max-stack global setting.
2639 */
2640 evlist__for_each_entry(evlist, evsel) {
27de9b2b 2641 if (evsel__has_callchain(evsel) &&
bd3dda9a
ACM
2642 evsel->attr.sample_max_stack == 0)
2643 evsel->attr.sample_max_stack = trace->max_stack;
2644 }
514f1c67 2645again:
efd5745e 2646 before = trace->nr_events;
514f1c67
ACM
2647
2648 for (i = 0; i < evlist->nr_mmaps; i++) {
2649 union perf_event *event;
d7f55c62 2650 struct perf_mmap *md;
514f1c67 2651
d7f55c62 2652 md = &evlist->mmap[i];
b9bae2c8 2653 if (perf_mmap__read_init(md) < 0)
d7f55c62
KL
2654 continue;
2655
0019dc87 2656 while ((event = perf_mmap__read_event(md)) != NULL) {
514f1c67 2657 struct perf_sample sample;
514f1c67 2658
efd5745e 2659 ++trace->nr_events;
514f1c67 2660
514f1c67
ACM
2661 err = perf_evlist__parse_sample(evlist, event, &sample);
2662 if (err) {
c24ff998 2663 fprintf(trace->output, "Can't parse sample, err = %d, skipping...\n", err);
8e50d384 2664 goto next_event;
514f1c67
ACM
2665 }
2666
ddbb1b13 2667 trace__handle_event(trace, event, &sample);
8e50d384 2668next_event:
d6ace3df 2669 perf_mmap__consume(md);
20c5f10e 2670
ba209f85
ACM
2671 if (interrupted)
2672 goto out_disable;
02ac5421
ACM
2673
2674 if (done && !draining) {
2675 perf_evlist__disable(evlist);
2676 draining = true;
2677 }
514f1c67 2678 }
d7f55c62 2679 perf_mmap__read_done(md);
514f1c67
ACM
2680 }
2681
efd5745e 2682 if (trace->nr_events == before) {
ba209f85 2683 int timeout = done ? 100 : -1;
f15eb531 2684
46fb3c21
ACM
2685 if (!draining && perf_evlist__poll(evlist, timeout) > 0) {
2686 if (perf_evlist__filter_pollfd(evlist, POLLERR | POLLHUP) == 0)
2687 draining = true;
2688
ba209f85 2689 goto again;
46fb3c21 2690 }
ba209f85
ACM
2691 } else {
2692 goto again;
f15eb531
NK
2693 }
2694
ba209f85 2695out_disable:
f3b623b8
ACM
2696 thread__zput(trace->current);
2697
ba209f85 2698 perf_evlist__disable(evlist);
514f1c67 2699
c522739d
ACM
2700 if (!err) {
2701 if (trace->summary)
2702 trace__fprintf_thread_summary(trace, trace->output);
2703
2704 if (trace->show_tool_stats) {
2705 fprintf(trace->output, "Stats:\n "
2706 " vfs_getname : %" PRIu64 "\n"
2707 " proc_getname: %" PRIu64 "\n",
2708 trace->stats.vfs_getname,
2709 trace->stats.proc_getname);
2710 }
2711 }
bf2575c1 2712
514f1c67 2713out_delete_evlist:
33974a41
AV
2714 trace__symbols__exit(trace);
2715
514f1c67 2716 perf_evlist__delete(evlist);
9ea42ba4 2717 cgroup__put(trace->cgroup);
14a052df 2718 trace->evlist = NULL;
75b757ca 2719 trace->live = false;
514f1c67 2720 return err;
6ef068cb
ACM
2721{
2722 char errbuf[BUFSIZ];
a8f23d8f 2723
2cc990ba 2724out_error_sched_stat_runtime:
988bdb31 2725 tracing_path__strerror_open_tp(errno, errbuf, sizeof(errbuf), "sched", "sched_stat_runtime");
2cc990ba
ACM
2726 goto out_error;
2727
801c67b0 2728out_error_raw_syscalls:
988bdb31 2729 tracing_path__strerror_open_tp(errno, errbuf, sizeof(errbuf), "raw_syscalls", "sys_(enter|exit)");
a8f23d8f
ACM
2730 goto out_error;
2731
e09b18d4
ACM
2732out_error_mmap:
2733 perf_evlist__strerror_mmap(evlist, errno, errbuf, sizeof(errbuf));
2734 goto out_error;
2735
a8f23d8f
ACM
2736out_error_open:
2737 perf_evlist__strerror_open(evlist, errno, errbuf, sizeof(errbuf));
2738
2739out_error:
6ef068cb 2740 fprintf(trace->output, "%s\n", errbuf);
87f91868 2741 goto out_delete_evlist;
94ad89bc
ACM
2742
2743out_error_apply_filters:
2744 fprintf(trace->output,
2745 "Failed to set filter \"%s\" on event %s with %d (%s)\n",
2746 evsel->filter, perf_evsel__name(evsel), errno,
c8b5f2c9 2747 str_error_r(errno, errbuf, sizeof(errbuf)));
94ad89bc 2748 goto out_delete_evlist;
514f1c67 2749}
5ed08dae
ACM
2750out_error_mem:
2751 fprintf(trace->output, "Not enough memory to run!\n");
2752 goto out_delete_evlist;
19867b61
ACM
2753
2754out_errno:
2755 fprintf(trace->output, "errno=%d,%s\n", errno, strerror(errno));
2756 goto out_delete_evlist;
a8f23d8f 2757}
514f1c67 2758
6810fc91
DA
2759static int trace__replay(struct trace *trace)
2760{
2761 const struct perf_evsel_str_handler handlers[] = {
c522739d 2762 { "probe:vfs_getname", trace__vfs_getname, },
6810fc91 2763 };
8ceb41d7 2764 struct perf_data data = {
eae8ad80
JO
2765 .file = {
2766 .path = input_name,
2767 },
2768 .mode = PERF_DATA_MODE_READ,
2769 .force = trace->force,
f5fc1412 2770 };
6810fc91 2771 struct perf_session *session;
003824e8 2772 struct perf_evsel *evsel;
6810fc91
DA
2773 int err = -1;
2774
2775 trace->tool.sample = trace__process_sample;
2776 trace->tool.mmap = perf_event__process_mmap;
384c671e 2777 trace->tool.mmap2 = perf_event__process_mmap2;
6810fc91
DA
2778 trace->tool.comm = perf_event__process_comm;
2779 trace->tool.exit = perf_event__process_exit;
2780 trace->tool.fork = perf_event__process_fork;
2781 trace->tool.attr = perf_event__process_attr;
f3b3614a 2782 trace->tool.tracing_data = perf_event__process_tracing_data;
6810fc91 2783 trace->tool.build_id = perf_event__process_build_id;
f3b3614a 2784 trace->tool.namespaces = perf_event__process_namespaces;
6810fc91 2785
0a8cb85c 2786 trace->tool.ordered_events = true;
6810fc91
DA
2787 trace->tool.ordering_requires_timestamps = true;
2788
2789 /* add tid to output */
2790 trace->multiple_threads = true;
2791
8ceb41d7 2792 session = perf_session__new(&data, false, &trace->tool);
6810fc91 2793 if (session == NULL)
52e02834 2794 return -1;
6810fc91 2795
aa07df6e
DA
2796 if (trace->opts.target.pid)
2797 symbol_conf.pid_list_str = strdup(trace->opts.target.pid);
2798
2799 if (trace->opts.target.tid)
2800 symbol_conf.tid_list_str = strdup(trace->opts.target.tid);
2801
0a7e6d1b 2802 if (symbol__init(&session->header.env) < 0)
cb2ffae2
NK
2803 goto out;
2804
8fb598e5
DA
2805 trace->host = &session->machines.host;
2806
6810fc91
DA
2807 err = perf_session__set_tracepoints_handlers(session, handlers);
2808 if (err)
2809 goto out;
2810
003824e8
NK
2811 evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
2812 "raw_syscalls:sys_enter");
9aca7f17
DA
2813 /* older kernels have syscalls tp versus raw_syscalls */
2814 if (evsel == NULL)
2815 evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
2816 "syscalls:sys_enter");
003824e8 2817
e281a960 2818 if (evsel &&
63f11c80 2819 (perf_evsel__init_raw_syscall_tp(evsel, trace__sys_enter) < 0 ||
e281a960 2820 perf_evsel__init_sc_tp_ptr_field(evsel, args))) {
003824e8
NK
2821 pr_err("Error during initialize raw_syscalls:sys_enter event\n");
2822 goto out;
2823 }
2824
2825 evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
2826 "raw_syscalls:sys_exit");
9aca7f17
DA
2827 if (evsel == NULL)
2828 evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
2829 "syscalls:sys_exit");
e281a960 2830 if (evsel &&
63f11c80 2831 (perf_evsel__init_raw_syscall_tp(evsel, trace__sys_exit) < 0 ||
e281a960 2832 perf_evsel__init_sc_tp_uint_field(evsel, ret))) {
003824e8 2833 pr_err("Error during initialize raw_syscalls:sys_exit event\n");
6810fc91
DA
2834 goto out;
2835 }
2836
e5cadb93 2837 evlist__for_each_entry(session->evlist, evsel) {
1e28fe0a
SF
2838 if (evsel->attr.type == PERF_TYPE_SOFTWARE &&
2839 (evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS_MAJ ||
2840 evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS_MIN ||
2841 evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS))
2842 evsel->handler = trace__pgfault;
2843 }
2844
6810fc91
DA
2845 setup_pager();
2846
b7b61cbe 2847 err = perf_session__process_events(session);
6810fc91
DA
2848 if (err)
2849 pr_err("Failed to process events, error %d", err);
2850
bf2575c1
DA
2851 else if (trace->summary)
2852 trace__fprintf_thread_summary(trace, trace->output);
2853
6810fc91
DA
2854out:
2855 perf_session__delete(session);
2856
2857 return err;
2858}
2859
1302d88e
ACM
2860static size_t trace__fprintf_threads_header(FILE *fp)
2861{
2862 size_t printed;
2863
99ff7150 2864 printed = fprintf(fp, "\n Summary of events:\n\n");
bf2575c1
DA
2865
2866 return printed;
2867}
2868
b535d523
ACM
2869DEFINE_RESORT_RB(syscall_stats, a->msecs > b->msecs,
2870 struct stats *stats;
2871 double msecs;
2872 int syscall;
2873)
2874{
2875 struct int_node *source = rb_entry(nd, struct int_node, rb_node);
2876 struct stats *stats = source->priv;
2877
2878 entry->syscall = source->i;
2879 entry->stats = stats;
2880 entry->msecs = stats ? (u64)stats->n * (avg_stats(stats) / NSEC_PER_MSEC) : 0;
2881}
2882
bf2575c1
DA
2883static size_t thread__dump_stats(struct thread_trace *ttrace,
2884 struct trace *trace, FILE *fp)
2885{
bf2575c1
DA
2886 size_t printed = 0;
2887 struct syscall *sc;
b535d523
ACM
2888 struct rb_node *nd;
2889 DECLARE_RESORT_RB_INTLIST(syscall_stats, ttrace->syscall_stats);
bf2575c1 2890
b535d523 2891 if (syscall_stats == NULL)
bf2575c1
DA
2892 return 0;
2893
2894 printed += fprintf(fp, "\n");
2895
834fd46d
MW
2896 printed += fprintf(fp, " syscall calls total min avg max stddev\n");
2897 printed += fprintf(fp, " (msec) (msec) (msec) (msec) (%%)\n");
2898 printed += fprintf(fp, " --------------- -------- --------- --------- --------- --------- ------\n");
99ff7150 2899
98a91837 2900 resort_rb__for_each_entry(nd, syscall_stats) {
b535d523 2901 struct stats *stats = syscall_stats_entry->stats;
bf2575c1
DA
2902 if (stats) {
2903 double min = (double)(stats->min) / NSEC_PER_MSEC;
2904 double max = (double)(stats->max) / NSEC_PER_MSEC;
2905 double avg = avg_stats(stats);
2906 double pct;
2907 u64 n = (u64) stats->n;
2908
2909 pct = avg ? 100.0 * stddev_stats(stats)/avg : 0.0;
2910 avg /= NSEC_PER_MSEC;
2911
b535d523 2912 sc = &trace->syscalls.table[syscall_stats_entry->syscall];
99ff7150 2913 printed += fprintf(fp, " %-15s", sc->name);
834fd46d 2914 printed += fprintf(fp, " %8" PRIu64 " %9.3f %9.3f %9.3f",
b535d523 2915 n, syscall_stats_entry->msecs, min, avg);
27a778b5 2916 printed += fprintf(fp, " %9.3f %9.2f%%\n", max, pct);
bf2575c1 2917 }
bf2575c1
DA
2918 }
2919
b535d523 2920 resort_rb__delete(syscall_stats);
bf2575c1 2921 printed += fprintf(fp, "\n\n");
1302d88e
ACM
2922
2923 return printed;
2924}
2925
96c14451 2926static size_t trace__fprintf_thread(FILE *fp, struct thread *thread, struct trace *trace)
896cbb56 2927{
96c14451 2928 size_t printed = 0;
89dceb22 2929 struct thread_trace *ttrace = thread__priv(thread);
896cbb56
DA
2930 double ratio;
2931
2932 if (ttrace == NULL)
2933 return 0;
2934
2935 ratio = (double)ttrace->nr_events / trace->nr_events * 100.0;
2936
15e65c69 2937 printed += fprintf(fp, " %s (%d), ", thread__comm_str(thread), thread->tid);
99ff7150 2938 printed += fprintf(fp, "%lu events, ", ttrace->nr_events);
15e65c69 2939 printed += fprintf(fp, "%.1f%%", ratio);
a2ea67d7
SF
2940 if (ttrace->pfmaj)
2941 printed += fprintf(fp, ", %lu majfaults", ttrace->pfmaj);
2942 if (ttrace->pfmin)
2943 printed += fprintf(fp, ", %lu minfaults", ttrace->pfmin);
03548ebf
ACM
2944 if (trace->sched)
2945 printed += fprintf(fp, ", %.3f msec\n", ttrace->runtime_ms);
2946 else if (fputc('\n', fp) != EOF)
2947 ++printed;
2948
bf2575c1 2949 printed += thread__dump_stats(ttrace, trace, fp);
896cbb56 2950
96c14451
ACM
2951 return printed;
2952}
896cbb56 2953
96c14451
ACM
2954static unsigned long thread__nr_events(struct thread_trace *ttrace)
2955{
2956 return ttrace ? ttrace->nr_events : 0;
2957}
2958
2959DEFINE_RESORT_RB(threads, (thread__nr_events(a->thread->priv) < thread__nr_events(b->thread->priv)),
2960 struct thread *thread;
2961)
2962{
2963 entry->thread = rb_entry(nd, struct thread, rb_node);
896cbb56
DA
2964}
2965
1302d88e
ACM
2966static size_t trace__fprintf_thread_summary(struct trace *trace, FILE *fp)
2967{
96c14451
ACM
2968 size_t printed = trace__fprintf_threads_header(fp);
2969 struct rb_node *nd;
91e467bc 2970 int i;
1302d88e 2971
91e467bc
KL
2972 for (i = 0; i < THREADS__TABLE_SIZE; i++) {
2973 DECLARE_RESORT_RB_MACHINE_THREADS(threads, trace->host, i);
96c14451 2974
91e467bc
KL
2975 if (threads == NULL) {
2976 fprintf(fp, "%s", "Error sorting output by nr_events!\n");
2977 return 0;
2978 }
896cbb56 2979
91e467bc
KL
2980 resort_rb__for_each_entry(nd, threads)
2981 printed += trace__fprintf_thread(fp, threads_entry->thread, trace);
96c14451 2982
91e467bc
KL
2983 resort_rb__delete(threads);
2984 }
96c14451 2985 return printed;
1302d88e
ACM
2986}
2987
ae9ed035
ACM
2988static int trace__set_duration(const struct option *opt, const char *str,
2989 int unset __maybe_unused)
2990{
2991 struct trace *trace = opt->value;
2992
2993 trace->duration_filter = atof(str);
2994 return 0;
2995}
2996
f078c385
ACM
2997static int trace__set_filter_pids(const struct option *opt, const char *str,
2998 int unset __maybe_unused)
2999{
3000 int ret = -1;
3001 size_t i;
3002 struct trace *trace = opt->value;
3003 /*
3004 * FIXME: introduce a intarray class, plain parse csv and create a
3005 * { int nr, int entries[] } struct...
3006 */
3007 struct intlist *list = intlist__new(str);
3008
3009 if (list == NULL)
3010 return -1;
3011
3012 i = trace->filter_pids.nr = intlist__nr_entries(list) + 1;
3013 trace->filter_pids.entries = calloc(i, sizeof(pid_t));
3014
3015 if (trace->filter_pids.entries == NULL)
3016 goto out;
3017
3018 trace->filter_pids.entries[0] = getpid();
3019
3020 for (i = 1; i < trace->filter_pids.nr; ++i)
3021 trace->filter_pids.entries[i] = intlist__entry(list, i - 1)->i;
3022
3023 intlist__delete(list);
3024 ret = 0;
3025out:
3026 return ret;
3027}
3028
c24ff998
ACM
3029static int trace__open_output(struct trace *trace, const char *filename)
3030{
3031 struct stat st;
3032
3033 if (!stat(filename, &st) && st.st_size) {
3034 char oldname[PATH_MAX];
3035
3036 scnprintf(oldname, sizeof(oldname), "%s.old", filename);
3037 unlink(oldname);
3038 rename(filename, oldname);
3039 }
3040
3041 trace->output = fopen(filename, "w");
3042
3043 return trace->output == NULL ? -errno : 0;
3044}
3045
598d02c5
SF
3046static int parse_pagefaults(const struct option *opt, const char *str,
3047 int unset __maybe_unused)
3048{
3049 int *trace_pgfaults = opt->value;
3050
3051 if (strcmp(str, "all") == 0)
3052 *trace_pgfaults |= TRACE_PFMAJ | TRACE_PFMIN;
3053 else if (strcmp(str, "maj") == 0)
3054 *trace_pgfaults |= TRACE_PFMAJ;
3055 else if (strcmp(str, "min") == 0)
3056 *trace_pgfaults |= TRACE_PFMIN;
3057 else
3058 return -1;
3059
3060 return 0;
3061}
3062
14a052df
ACM
3063static void evlist__set_evsel_handler(struct perf_evlist *evlist, void *handler)
3064{
3065 struct perf_evsel *evsel;
3066
e5cadb93 3067 evlist__for_each_entry(evlist, evsel)
14a052df
ACM
3068 evsel->handler = handler;
3069}
3070
d32855fa
ACM
3071static int evlist__set_syscall_tp_fields(struct perf_evlist *evlist)
3072{
3073 struct perf_evsel *evsel;
3074
3075 evlist__for_each_entry(evlist, evsel) {
3076 if (evsel->priv || !evsel->tp_format)
3077 continue;
3078
3079 if (strcmp(evsel->tp_format->system, "syscalls"))
3080 continue;
3081
3082 if (perf_evsel__init_syscall_tp(evsel))
3083 return -1;
3084
3085 if (!strncmp(evsel->tp_format->name, "sys_enter_", 10)) {
3086 struct syscall_tp *sc = evsel->priv;
3087
3088 if (__tp_field__init_ptr(&sc->args, sc->id.offset + sizeof(u64)))
3089 return -1;
3090 } else if (!strncmp(evsel->tp_format->name, "sys_exit_", 9)) {
3091 struct syscall_tp *sc = evsel->priv;
3092
3093 if (__tp_field__init_uint(&sc->ret, sizeof(u64), sc->id.offset + sizeof(u64), evsel->needs_swap))
3094 return -1;
3095 }
3096 }
3097
3098 return 0;
3099}
3100
017037ff
ACM
3101/*
3102 * XXX: Hackish, just splitting the combined -e+--event (syscalls
3103 * (raw_syscalls:{sys_{enter,exit}} + events (tracepoints, HW, SW, etc) to use
3104 * existing facilities unchanged (trace->ev_qualifier + parse_options()).
3105 *
3106 * It'd be better to introduce a parse_options() variant that would return a
3107 * list with the terms it didn't match to an event...
3108 */
3109static int trace__parse_events_option(const struct option *opt, const char *str,
3110 int unset __maybe_unused)
3111{
3112 struct trace *trace = (struct trace *)opt->value;
3113 const char *s = str;
3114 char *sep = NULL, *lists[2] = { NULL, NULL, };
27702bcf 3115 int len = strlen(str) + 1, err = -1, list, idx;
017037ff
ACM
3116 char *strace_groups_dir = system_path(STRACE_GROUPS_DIR);
3117 char group_name[PATH_MAX];
3118
3119 if (strace_groups_dir == NULL)
3120 return -1;
3121
3122 if (*s == '!') {
3123 ++s;
3124 trace->not_ev_qualifier = true;
3125 }
3126
3127 while (1) {
3128 if ((sep = strchr(s, ',')) != NULL)
3129 *sep = '\0';
3130
3131 list = 0;
27702bcf
ACM
3132 if (syscalltbl__id(trace->sctbl, s) >= 0 ||
3133 syscalltbl__strglobmatch_first(trace->sctbl, s, &idx) >= 0) {
017037ff
ACM
3134 list = 1;
3135 } else {
3136 path__join(group_name, sizeof(group_name), strace_groups_dir, s);
3137 if (access(group_name, R_OK) == 0)
3138 list = 1;
3139 }
3140
3141 if (lists[list]) {
3142 sprintf(lists[list] + strlen(lists[list]), ",%s", s);
3143 } else {
3144 lists[list] = malloc(len);
3145 if (lists[list] == NULL)
3146 goto out;
3147 strcpy(lists[list], s);
3148 }
3149
3150 if (!sep)
3151 break;
3152
3153 *sep = ',';
3154 s = sep + 1;
3155 }
3156
3157 if (lists[1] != NULL) {
3158 struct strlist_config slist_config = {
3159 .dirname = strace_groups_dir,
3160 };
3161
3162 trace->ev_qualifier = strlist__new(lists[1], &slist_config);
3163 if (trace->ev_qualifier == NULL) {
3164 fputs("Not enough memory to parse event qualifier", trace->output);
3165 goto out;
3166 }
3167
3168 if (trace__validate_ev_qualifier(trace))
3169 goto out;
b912885a 3170 trace->trace_syscalls = true;
017037ff
ACM
3171 }
3172
3173 err = 0;
3174
3175 if (lists[0]) {
3176 struct option o = OPT_CALLBACK('e', "event", &trace->evlist, "event",
3177 "event selector. use 'perf list' to list available events",
3178 parse_events_option);
3179 err = parse_events_option(&o, lists[0], 0);
3180 }
3181out:
3182 if (sep)
3183 *sep = ',';
3184
3185 return err;
3186}
3187
9ea42ba4
ACM
3188static int trace__parse_cgroups(const struct option *opt, const char *str, int unset)
3189{
3190 struct trace *trace = opt->value;
3191
3192 if (!list_empty(&trace->evlist->entries))
3193 return parse_cgroups(opt, str, unset);
3194
3195 trace->cgroup = evlist__findnew_cgroup(trace->evlist, str);
3196
3197 return 0;
3198}
3199
b0ad8ea6 3200int cmd_trace(int argc, const char **argv)
514f1c67 3201{
6fdd9cb7 3202 const char *trace_usage[] = {
f15eb531
NK
3203 "perf trace [<options>] [<command>]",
3204 "perf trace [<options>] -- <command> [<options>]",
5e2485b1
DA
3205 "perf trace record [<options>] [<command>]",
3206 "perf trace record [<options>] -- <command> [<options>]",
514f1c67
ACM
3207 NULL
3208 };
3209 struct trace trace = {
514f1c67
ACM
3210 .syscalls = {
3211 . max = -1,
3212 },
3213 .opts = {
3214 .target = {
3215 .uid = UINT_MAX,
3216 .uses_mmap = true,
3217 },
3218 .user_freq = UINT_MAX,
3219 .user_interval = ULLONG_MAX,
509051ea 3220 .no_buffering = true,
38d5447d 3221 .mmap_pages = UINT_MAX,
9d9cad76 3222 .proc_map_timeout = 500,
514f1c67 3223 },
007d66a0 3224 .output = stderr,
50c95cbd 3225 .show_comm = true,
b912885a 3226 .trace_syscalls = false,
44621819 3227 .kernel_syscallchains = false,
05614993 3228 .max_stack = UINT_MAX,
514f1c67 3229 };
c24ff998 3230 const char *output_name = NULL;
514f1c67 3231 const struct option trace_options[] = {
017037ff
ACM
3232 OPT_CALLBACK('e', "event", &trace, "event",
3233 "event/syscall selector. use 'perf list' to list available events",
3234 trace__parse_events_option),
50c95cbd
ACM
3235 OPT_BOOLEAN(0, "comm", &trace.show_comm,
3236 "show the thread COMM next to its id"),
c522739d 3237 OPT_BOOLEAN(0, "tool_stats", &trace.show_tool_stats, "show tool stats"),
017037ff
ACM
3238 OPT_CALLBACK(0, "expr", &trace, "expr", "list of syscalls/events to trace",
3239 trace__parse_events_option),
c24ff998 3240 OPT_STRING('o', "output", &output_name, "file", "output file name"),
6810fc91 3241 OPT_STRING('i', "input", &input_name, "file", "Analyze events in file"),
514f1c67
ACM
3242 OPT_STRING('p', "pid", &trace.opts.target.pid, "pid",
3243 "trace events on existing process id"),
ac9be8ee 3244 OPT_STRING('t', "tid", &trace.opts.target.tid, "tid",
514f1c67 3245 "trace events on existing thread id"),
fa0e4ffe
ACM
3246 OPT_CALLBACK(0, "filter-pids", &trace, "CSV list of pids",
3247 "pids to filter (by the kernel)", trace__set_filter_pids),
ac9be8ee 3248 OPT_BOOLEAN('a', "all-cpus", &trace.opts.target.system_wide,
514f1c67 3249 "system-wide collection from all CPUs"),
ac9be8ee 3250 OPT_STRING('C', "cpu", &trace.opts.target.cpu_list, "cpu",
514f1c67 3251 "list of cpus to monitor"),
6810fc91 3252 OPT_BOOLEAN(0, "no-inherit", &trace.opts.no_inherit,
514f1c67 3253 "child tasks do not inherit counters"),
994a1f78
JO
3254 OPT_CALLBACK('m', "mmap-pages", &trace.opts.mmap_pages, "pages",
3255 "number of mmap data pages",
3256 perf_evlist__parse_mmap_pages),
ac9be8ee 3257 OPT_STRING('u', "uid", &trace.opts.target.uid_str, "user",
514f1c67 3258 "user to profile"),
ae9ed035
ACM
3259 OPT_CALLBACK(0, "duration", &trace, "float",
3260 "show only events with duration > N.M ms",
3261 trace__set_duration),
1302d88e 3262 OPT_BOOLEAN(0, "sched", &trace.sched, "show blocking scheduler events"),
7c304ee0 3263 OPT_INCR('v', "verbose", &verbose, "be more verbose"),
4bb09192
DA
3264 OPT_BOOLEAN('T', "time", &trace.full_time,
3265 "Show full timestamp, not time relative to first start"),
0a6545bd
ACM
3266 OPT_BOOLEAN(0, "failure", &trace.failure_only,
3267 "Show only syscalls that failed"),
fd2eabaf
DA
3268 OPT_BOOLEAN('s', "summary", &trace.summary_only,
3269 "Show only syscall summary with statistics"),
3270 OPT_BOOLEAN('S', "with-summary", &trace.summary,
3271 "Show all syscalls and summary with statistics"),
598d02c5
SF
3272 OPT_CALLBACK_DEFAULT('F', "pf", &trace.trace_pgfaults, "all|maj|min",
3273 "Trace pagefaults", parse_pagefaults, "maj"),
e281a960 3274 OPT_BOOLEAN(0, "syscalls", &trace.trace_syscalls, "Trace syscalls"),
e366a6d8 3275 OPT_BOOLEAN('f', "force", &trace.force, "don't complain, do it"),
566a0885
MW
3276 OPT_CALLBACK(0, "call-graph", &trace.opts,
3277 "record_mode[,record_size]", record_callchain_help,
3278 &record_parse_callchain_opt),
44621819
ACM
3279 OPT_BOOLEAN(0, "kernel-syscall-graph", &trace.kernel_syscallchains,
3280 "Show the kernel callchains on the syscall exit path"),
5cf9c84e
ACM
3281 OPT_UINTEGER(0, "min-stack", &trace.min_stack,
3282 "Set the minimum stack depth when parsing the callchain, "
3283 "anything below the specified depth will be ignored."),
c6d4a494
ACM
3284 OPT_UINTEGER(0, "max-stack", &trace.max_stack,
3285 "Set the maximum stack depth when parsing the callchain, "
3286 "anything beyond the specified depth will be ignored. "
4cb93446 3287 "Default: kernel.perf_event_max_stack or " __stringify(PERF_MAX_STACK_DEPTH)),
591421e1
ACM
3288 OPT_BOOLEAN(0, "print-sample", &trace.print_sample,
3289 "print the PERF_RECORD_SAMPLE PERF_SAMPLE_ info, for debugging"),
9d9cad76
KL
3290 OPT_UINTEGER(0, "proc-map-timeout", &trace.opts.proc_map_timeout,
3291 "per thread proc mmap processing timeout in ms"),
9ea42ba4
ACM
3292 OPT_CALLBACK('G', "cgroup", &trace, "name", "monitor event in cgroup name only",
3293 trace__parse_cgroups),
e36b7821
AB
3294 OPT_UINTEGER('D', "delay", &trace.opts.initial_delay,
3295 "ms to wait before starting measurement after program "
3296 "start"),
514f1c67
ACM
3297 OPT_END()
3298 };
ccd62a89 3299 bool __maybe_unused max_stack_user_set = true;
f3e459d1 3300 bool mmap_pages_user_set = true;
78e890ea 3301 struct perf_evsel *evsel;
6fdd9cb7 3302 const char * const trace_subcommands[] = { "record", NULL };
78e890ea 3303 int err = -1;
32caf0d1 3304 char bf[BUFSIZ];
514f1c67 3305
4d08cb80
ACM
3306 signal(SIGSEGV, sighandler_dump_stack);
3307 signal(SIGFPE, sighandler_dump_stack);
3308
14a052df 3309 trace.evlist = perf_evlist__new();
fd0db102 3310 trace.sctbl = syscalltbl__new();
14a052df 3311
fd0db102 3312 if (trace.evlist == NULL || trace.sctbl == NULL) {
14a052df 3313 pr_err("Not enough memory to run!\n");
ff8f695c 3314 err = -ENOMEM;
14a052df
ACM
3315 goto out;
3316 }
3317
6fdd9cb7
YS
3318 argc = parse_options_subcommand(argc, argv, trace_options, trace_subcommands,
3319 trace_usage, PARSE_OPT_STOP_AT_NON_OPTION);
fd2eabaf 3320
9ea42ba4
ACM
3321 if ((nr_cgroups || trace.cgroup) && !trace.opts.target.system_wide) {
3322 usage_with_options_msg(trace_usage, trace_options,
3323 "cgroup monitoring only available in system-wide mode");
3324 }
3325
78e890ea
ACM
3326 evsel = bpf__setup_output_event(trace.evlist, "__augmented_syscalls__");
3327 if (IS_ERR(evsel)) {
3328 bpf__strerror_setup_output_event(trace.evlist, PTR_ERR(evsel), bf, sizeof(bf));
e0b6d2ef
ACM
3329 pr_err("ERROR: Setup trace syscalls enter failed: %s\n", bf);
3330 goto out;
3331 }
3332
d3d1c4bd
ACM
3333 if (evsel) {
3334 if (perf_evsel__init_augmented_syscall_tp(evsel) ||
3335 perf_evsel__init_augmented_syscall_tp_args(evsel))
3336 goto out;
3337 trace.syscalls.events.augmented = evsel;
3338 }
3339
d7888573
WN
3340 err = bpf__setup_stdout(trace.evlist);
3341 if (err) {
3342 bpf__strerror_setup_stdout(trace.evlist, err, bf, sizeof(bf));
3343 pr_err("ERROR: Setup BPF stdout failed: %s\n", bf);
3344 goto out;
3345 }
3346
59247e33
ACM
3347 err = -1;
3348
598d02c5
SF
3349 if (trace.trace_pgfaults) {
3350 trace.opts.sample_address = true;
3351 trace.opts.sample_time = true;
3352 }
3353
f3e459d1
ACM
3354 if (trace.opts.mmap_pages == UINT_MAX)
3355 mmap_pages_user_set = false;
3356
05614993 3357 if (trace.max_stack == UINT_MAX) {
029c75e5 3358 trace.max_stack = input_name ? PERF_MAX_STACK_DEPTH : sysctl__max_stack();
05614993
ACM
3359 max_stack_user_set = false;
3360 }
3361
3362#ifdef HAVE_DWARF_UNWIND_SUPPORT
75d50117 3363 if ((trace.min_stack || max_stack_user_set) && !callchain_param.enabled) {
05614993 3364 record_opts__parse_callchain(&trace.opts, &callchain_param, "dwarf", false);
75d50117 3365 }
05614993
ACM
3366#endif
3367
2ddd5c04 3368 if (callchain_param.enabled) {
f3e459d1
ACM
3369 if (!mmap_pages_user_set && geteuid() == 0)
3370 trace.opts.mmap_pages = perf_event_mlock_kb_in_pages() * 4;
3371
566a0885 3372 symbol_conf.use_callchain = true;
f3e459d1 3373 }
566a0885 3374
d32855fa 3375 if (trace.evlist->nr_entries > 0) {
14a052df 3376 evlist__set_evsel_handler(trace.evlist, trace__event_handler);
d32855fa
ACM
3377 if (evlist__set_syscall_tp_fields(trace.evlist)) {
3378 perror("failed to set syscalls:* tracepoint fields");
3379 goto out;
3380 }
3381 }
14a052df 3382
1e28fe0a
SF
3383 if ((argc >= 1) && (strcmp(argv[0], "record") == 0))
3384 return trace__record(&trace, argc-1, &argv[1]);
3385
3386 /* summary_only implies summary option, but don't overwrite summary if set */
3387 if (trace.summary_only)
3388 trace.summary = trace.summary_only;
3389
726f3234
ACM
3390 if (!trace.trace_syscalls && !trace.trace_pgfaults &&
3391 trace.evlist->nr_entries == 0 /* Was --events used? */) {
b912885a 3392 trace.trace_syscalls = true;
59247e33
ACM
3393 }
3394
c24ff998
ACM
3395 if (output_name != NULL) {
3396 err = trace__open_output(&trace, output_name);
3397 if (err < 0) {
3398 perror("failed to create output file");
3399 goto out;
3400 }
3401 }
3402
602ad878 3403 err = target__validate(&trace.opts.target);
32caf0d1 3404 if (err) {
602ad878 3405 target__strerror(&trace.opts.target, err, bf, sizeof(bf));
c24ff998
ACM
3406 fprintf(trace.output, "%s", bf);
3407 goto out_close;
32caf0d1
NK
3408 }
3409
602ad878 3410 err = target__parse_uid(&trace.opts.target);
514f1c67 3411 if (err) {
602ad878 3412 target__strerror(&trace.opts.target, err, bf, sizeof(bf));
c24ff998
ACM
3413 fprintf(trace.output, "%s", bf);
3414 goto out_close;
514f1c67
ACM
3415 }
3416
602ad878 3417 if (!argc && target__none(&trace.opts.target))
ee76120e
NK
3418 trace.opts.target.system_wide = true;
3419
6810fc91
DA
3420 if (input_name)
3421 err = trace__replay(&trace);
3422 else
3423 err = trace__run(&trace, argc, argv);
1302d88e 3424
c24ff998
ACM
3425out_close:
3426 if (output_name != NULL)
3427 fclose(trace.output);
3428out:
1302d88e 3429 return err;
514f1c67 3430}