]>
Commit | Line | Data |
---|---|---|
1 | #include <traceevent/event-parse.h> | |
2 | #include "builtin.h" | |
3 | #include "util/color.h" | |
4 | #include "util/debug.h" | |
5 | #include "util/evlist.h" | |
6 | #include "util/machine.h" | |
7 | #include "util/session.h" | |
8 | #include "util/thread.h" | |
9 | #include "util/parse-options.h" | |
10 | #include "util/strlist.h" | |
11 | #include "util/intlist.h" | |
12 | #include "util/thread_map.h" | |
13 | ||
14 | #include <libaudit.h> | |
15 | #include <stdlib.h> | |
16 | #include <sys/mman.h> | |
17 | ||
18 | static size_t syscall_arg__scnprintf_hex(char *bf, size_t size, unsigned long arg) | |
19 | { | |
20 | return scnprintf(bf, size, "%#lx", arg); | |
21 | } | |
22 | ||
23 | #define SCA_HEX syscall_arg__scnprintf_hex | |
24 | ||
25 | static size_t syscall_arg__scnprintf_mmap_prot(char *bf, size_t size, unsigned long arg) | |
26 | { | |
27 | int printed = 0, prot = arg; | |
28 | ||
29 | if (prot == PROT_NONE) | |
30 | return scnprintf(bf, size, "NONE"); | |
31 | #define P_MMAP_PROT(n) \ | |
32 | if (prot & PROT_##n) { \ | |
33 | printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \ | |
34 | prot &= ~PROT_##n; \ | |
35 | } | |
36 | ||
37 | P_MMAP_PROT(EXEC); | |
38 | P_MMAP_PROT(READ); | |
39 | P_MMAP_PROT(WRITE); | |
40 | #ifdef PROT_SEM | |
41 | P_MMAP_PROT(SEM); | |
42 | #endif | |
43 | P_MMAP_PROT(GROWSDOWN); | |
44 | P_MMAP_PROT(GROWSUP); | |
45 | #undef P_MMAP_PROT | |
46 | ||
47 | if (prot) | |
48 | printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", prot); | |
49 | ||
50 | return printed; | |
51 | } | |
52 | ||
53 | #define SCA_MMAP_PROT syscall_arg__scnprintf_mmap_prot | |
54 | ||
55 | static size_t syscall_arg__scnprintf_mmap_flags(char *bf, size_t size, unsigned long arg) | |
56 | { | |
57 | int printed = 0, flags = arg; | |
58 | ||
59 | #define P_MMAP_FLAG(n) \ | |
60 | if (flags & MAP_##n) { \ | |
61 | printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \ | |
62 | flags &= ~MAP_##n; \ | |
63 | } | |
64 | ||
65 | P_MMAP_FLAG(SHARED); | |
66 | P_MMAP_FLAG(PRIVATE); | |
67 | P_MMAP_FLAG(32BIT); | |
68 | P_MMAP_FLAG(ANONYMOUS); | |
69 | P_MMAP_FLAG(DENYWRITE); | |
70 | P_MMAP_FLAG(EXECUTABLE); | |
71 | P_MMAP_FLAG(FILE); | |
72 | P_MMAP_FLAG(FIXED); | |
73 | P_MMAP_FLAG(GROWSDOWN); | |
74 | #ifdef MAP_HUGETLB | |
75 | P_MMAP_FLAG(HUGETLB); | |
76 | #endif | |
77 | P_MMAP_FLAG(LOCKED); | |
78 | P_MMAP_FLAG(NONBLOCK); | |
79 | P_MMAP_FLAG(NORESERVE); | |
80 | P_MMAP_FLAG(POPULATE); | |
81 | P_MMAP_FLAG(STACK); | |
82 | #ifdef MAP_UNINITIALIZED | |
83 | P_MMAP_FLAG(UNINITIALIZED); | |
84 | #endif | |
85 | #undef P_MMAP_FLAG | |
86 | ||
87 | if (flags) | |
88 | printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags); | |
89 | ||
90 | return printed; | |
91 | } | |
92 | ||
93 | #define SCA_MMAP_FLAGS syscall_arg__scnprintf_mmap_flags | |
94 | ||
95 | static size_t syscall_arg__scnprintf_madvise_behavior(char *bf, size_t size, unsigned long arg) | |
96 | { | |
97 | int behavior = arg; | |
98 | ||
99 | switch (behavior) { | |
100 | #define P_MADV_BHV(n) case MADV_##n: return scnprintf(bf, size, #n) | |
101 | P_MADV_BHV(NORMAL); | |
102 | P_MADV_BHV(RANDOM); | |
103 | P_MADV_BHV(SEQUENTIAL); | |
104 | P_MADV_BHV(WILLNEED); | |
105 | P_MADV_BHV(DONTNEED); | |
106 | P_MADV_BHV(REMOVE); | |
107 | P_MADV_BHV(DONTFORK); | |
108 | P_MADV_BHV(DOFORK); | |
109 | P_MADV_BHV(HWPOISON); | |
110 | #ifdef MADV_SOFT_OFFLINE | |
111 | P_MADV_BHV(SOFT_OFFLINE); | |
112 | #endif | |
113 | P_MADV_BHV(MERGEABLE); | |
114 | P_MADV_BHV(UNMERGEABLE); | |
115 | #ifdef MADV_HUGEPAGE | |
116 | P_MADV_BHV(HUGEPAGE); | |
117 | #endif | |
118 | #ifdef MADV_NOHUGEPAGE | |
119 | P_MADV_BHV(NOHUGEPAGE); | |
120 | #endif | |
121 | #ifdef MADV_DONTDUMP | |
122 | P_MADV_BHV(DONTDUMP); | |
123 | #endif | |
124 | #ifdef MADV_DODUMP | |
125 | P_MADV_BHV(DODUMP); | |
126 | #endif | |
127 | #undef P_MADV_PHV | |
128 | default: break; | |
129 | } | |
130 | ||
131 | return scnprintf(bf, size, "%#x", behavior); | |
132 | } | |
133 | ||
134 | #define SCA_MADV_BHV syscall_arg__scnprintf_madvise_behavior | |
135 | ||
136 | static struct syscall_fmt { | |
137 | const char *name; | |
138 | const char *alias; | |
139 | size_t (*arg_scnprintf[6])(char *bf, size_t size, unsigned long arg); | |
140 | bool errmsg; | |
141 | bool timeout; | |
142 | bool hexret; | |
143 | } syscall_fmts[] = { | |
144 | { .name = "access", .errmsg = true, }, | |
145 | { .name = "arch_prctl", .errmsg = true, .alias = "prctl", }, | |
146 | { .name = "brk", .hexret = true, | |
147 | .arg_scnprintf = { [0] = SCA_HEX, /* brk */ }, }, | |
148 | { .name = "mmap", .hexret = true, }, | |
149 | { .name = "connect", .errmsg = true, }, | |
150 | { .name = "fstat", .errmsg = true, .alias = "newfstat", }, | |
151 | { .name = "fstatat", .errmsg = true, .alias = "newfstatat", }, | |
152 | { .name = "futex", .errmsg = true, }, | |
153 | { .name = "ioctl", .errmsg = true, | |
154 | .arg_scnprintf = { [2] = SCA_HEX, /* arg */ }, }, | |
155 | { .name = "lstat", .errmsg = true, .alias = "newlstat", }, | |
156 | { .name = "madvise", .errmsg = true, | |
157 | .arg_scnprintf = { [0] = SCA_HEX, /* start */ | |
158 | [2] = SCA_MADV_BHV, /* behavior */ }, }, | |
159 | { .name = "mmap", .hexret = true, | |
160 | .arg_scnprintf = { [0] = SCA_HEX, /* addr */ | |
161 | [2] = SCA_MMAP_PROT, /* prot */ | |
162 | [3] = SCA_MMAP_FLAGS, /* flags */ }, }, | |
163 | { .name = "mprotect", .errmsg = true, | |
164 | .arg_scnprintf = { [0] = SCA_HEX, /* start */ | |
165 | [2] = SCA_MMAP_PROT, /* prot */ }, }, | |
166 | { .name = "mremap", .hexret = true, | |
167 | .arg_scnprintf = { [0] = SCA_HEX, /* addr */ | |
168 | [4] = SCA_HEX, /* new_addr */ }, }, | |
169 | { .name = "munmap", .errmsg = true, | |
170 | .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, }, | |
171 | { .name = "open", .errmsg = true, }, | |
172 | { .name = "poll", .errmsg = true, .timeout = true, }, | |
173 | { .name = "ppoll", .errmsg = true, .timeout = true, }, | |
174 | { .name = "pread", .errmsg = true, .alias = "pread64", }, | |
175 | { .name = "pwrite", .errmsg = true, .alias = "pwrite64", }, | |
176 | { .name = "read", .errmsg = true, }, | |
177 | { .name = "recvfrom", .errmsg = true, }, | |
178 | { .name = "select", .errmsg = true, .timeout = true, }, | |
179 | { .name = "socket", .errmsg = true, }, | |
180 | { .name = "stat", .errmsg = true, .alias = "newstat", }, | |
181 | { .name = "uname", .errmsg = true, .alias = "newuname", }, | |
182 | }; | |
183 | ||
184 | static int syscall_fmt__cmp(const void *name, const void *fmtp) | |
185 | { | |
186 | const struct syscall_fmt *fmt = fmtp; | |
187 | return strcmp(name, fmt->name); | |
188 | } | |
189 | ||
190 | static struct syscall_fmt *syscall_fmt__find(const char *name) | |
191 | { | |
192 | const int nmemb = ARRAY_SIZE(syscall_fmts); | |
193 | return bsearch(name, syscall_fmts, nmemb, sizeof(struct syscall_fmt), syscall_fmt__cmp); | |
194 | } | |
195 | ||
196 | struct syscall { | |
197 | struct event_format *tp_format; | |
198 | const char *name; | |
199 | bool filtered; | |
200 | struct syscall_fmt *fmt; | |
201 | size_t (**arg_scnprintf)(char *bf, size_t size, unsigned long arg); | |
202 | }; | |
203 | ||
204 | static size_t fprintf_duration(unsigned long t, FILE *fp) | |
205 | { | |
206 | double duration = (double)t / NSEC_PER_MSEC; | |
207 | size_t printed = fprintf(fp, "("); | |
208 | ||
209 | if (duration >= 1.0) | |
210 | printed += color_fprintf(fp, PERF_COLOR_RED, "%6.3f ms", duration); | |
211 | else if (duration >= 0.01) | |
212 | printed += color_fprintf(fp, PERF_COLOR_YELLOW, "%6.3f ms", duration); | |
213 | else | |
214 | printed += color_fprintf(fp, PERF_COLOR_NORMAL, "%6.3f ms", duration); | |
215 | return printed + fprintf(fp, "): "); | |
216 | } | |
217 | ||
218 | struct thread_trace { | |
219 | u64 entry_time; | |
220 | u64 exit_time; | |
221 | bool entry_pending; | |
222 | unsigned long nr_events; | |
223 | char *entry_str; | |
224 | double runtime_ms; | |
225 | }; | |
226 | ||
227 | static struct thread_trace *thread_trace__new(void) | |
228 | { | |
229 | return zalloc(sizeof(struct thread_trace)); | |
230 | } | |
231 | ||
232 | static struct thread_trace *thread__trace(struct thread *thread, FILE *fp) | |
233 | { | |
234 | struct thread_trace *ttrace; | |
235 | ||
236 | if (thread == NULL) | |
237 | goto fail; | |
238 | ||
239 | if (thread->priv == NULL) | |
240 | thread->priv = thread_trace__new(); | |
241 | ||
242 | if (thread->priv == NULL) | |
243 | goto fail; | |
244 | ||
245 | ttrace = thread->priv; | |
246 | ++ttrace->nr_events; | |
247 | ||
248 | return ttrace; | |
249 | fail: | |
250 | color_fprintf(fp, PERF_COLOR_RED, | |
251 | "WARNING: not enough memory, dropping samples!\n"); | |
252 | return NULL; | |
253 | } | |
254 | ||
255 | struct trace { | |
256 | struct perf_tool tool; | |
257 | int audit_machine; | |
258 | struct { | |
259 | int max; | |
260 | struct syscall *table; | |
261 | } syscalls; | |
262 | struct perf_record_opts opts; | |
263 | struct machine host; | |
264 | u64 base_time; | |
265 | FILE *output; | |
266 | unsigned long nr_events; | |
267 | struct strlist *ev_qualifier; | |
268 | bool not_ev_qualifier; | |
269 | struct intlist *tid_list; | |
270 | struct intlist *pid_list; | |
271 | bool sched; | |
272 | bool multiple_threads; | |
273 | double duration_filter; | |
274 | double runtime_ms; | |
275 | }; | |
276 | ||
277 | static bool trace__filter_duration(struct trace *trace, double t) | |
278 | { | |
279 | return t < (trace->duration_filter * NSEC_PER_MSEC); | |
280 | } | |
281 | ||
282 | static size_t trace__fprintf_tstamp(struct trace *trace, u64 tstamp, FILE *fp) | |
283 | { | |
284 | double ts = (double)(tstamp - trace->base_time) / NSEC_PER_MSEC; | |
285 | ||
286 | return fprintf(fp, "%10.3f ", ts); | |
287 | } | |
288 | ||
289 | static bool done = false; | |
290 | ||
291 | static void sig_handler(int sig __maybe_unused) | |
292 | { | |
293 | done = true; | |
294 | } | |
295 | ||
296 | static size_t trace__fprintf_entry_head(struct trace *trace, struct thread *thread, | |
297 | u64 duration, u64 tstamp, FILE *fp) | |
298 | { | |
299 | size_t printed = trace__fprintf_tstamp(trace, tstamp, fp); | |
300 | printed += fprintf_duration(duration, fp); | |
301 | ||
302 | if (trace->multiple_threads) | |
303 | printed += fprintf(fp, "%d ", thread->tid); | |
304 | ||
305 | return printed; | |
306 | } | |
307 | ||
308 | static int trace__process_event(struct trace *trace, struct machine *machine, | |
309 | union perf_event *event) | |
310 | { | |
311 | int ret = 0; | |
312 | ||
313 | switch (event->header.type) { | |
314 | case PERF_RECORD_LOST: | |
315 | color_fprintf(trace->output, PERF_COLOR_RED, | |
316 | "LOST %" PRIu64 " events!\n", event->lost.lost); | |
317 | ret = machine__process_lost_event(machine, event); | |
318 | default: | |
319 | ret = machine__process_event(machine, event); | |
320 | break; | |
321 | } | |
322 | ||
323 | return ret; | |
324 | } | |
325 | ||
326 | static int trace__tool_process(struct perf_tool *tool, | |
327 | union perf_event *event, | |
328 | struct perf_sample *sample __maybe_unused, | |
329 | struct machine *machine) | |
330 | { | |
331 | struct trace *trace = container_of(tool, struct trace, tool); | |
332 | return trace__process_event(trace, machine, event); | |
333 | } | |
334 | ||
335 | static int trace__symbols_init(struct trace *trace, struct perf_evlist *evlist) | |
336 | { | |
337 | int err = symbol__init(); | |
338 | ||
339 | if (err) | |
340 | return err; | |
341 | ||
342 | machine__init(&trace->host, "", HOST_KERNEL_ID); | |
343 | machine__create_kernel_maps(&trace->host); | |
344 | ||
345 | if (perf_target__has_task(&trace->opts.target)) { | |
346 | err = perf_event__synthesize_thread_map(&trace->tool, evlist->threads, | |
347 | trace__tool_process, | |
348 | &trace->host); | |
349 | } else { | |
350 | err = perf_event__synthesize_threads(&trace->tool, trace__tool_process, | |
351 | &trace->host); | |
352 | } | |
353 | ||
354 | if (err) | |
355 | symbol__exit(); | |
356 | ||
357 | return err; | |
358 | } | |
359 | ||
360 | static int syscall__set_arg_fmts(struct syscall *sc) | |
361 | { | |
362 | struct format_field *field; | |
363 | int idx = 0; | |
364 | ||
365 | sc->arg_scnprintf = calloc(sc->tp_format->format.nr_fields - 1, sizeof(void *)); | |
366 | if (sc->arg_scnprintf == NULL) | |
367 | return -1; | |
368 | ||
369 | for (field = sc->tp_format->format.fields->next; field; field = field->next) { | |
370 | if (sc->fmt && sc->fmt->arg_scnprintf[idx]) | |
371 | sc->arg_scnprintf[idx] = sc->fmt->arg_scnprintf[idx]; | |
372 | else if (field->flags & FIELD_IS_POINTER) | |
373 | sc->arg_scnprintf[idx] = syscall_arg__scnprintf_hex; | |
374 | ++idx; | |
375 | } | |
376 | ||
377 | return 0; | |
378 | } | |
379 | ||
380 | static int trace__read_syscall_info(struct trace *trace, int id) | |
381 | { | |
382 | char tp_name[128]; | |
383 | struct syscall *sc; | |
384 | const char *name = audit_syscall_to_name(id, trace->audit_machine); | |
385 | ||
386 | if (name == NULL) | |
387 | return -1; | |
388 | ||
389 | if (id > trace->syscalls.max) { | |
390 | struct syscall *nsyscalls = realloc(trace->syscalls.table, (id + 1) * sizeof(*sc)); | |
391 | ||
392 | if (nsyscalls == NULL) | |
393 | return -1; | |
394 | ||
395 | if (trace->syscalls.max != -1) { | |
396 | memset(nsyscalls + trace->syscalls.max + 1, 0, | |
397 | (id - trace->syscalls.max) * sizeof(*sc)); | |
398 | } else { | |
399 | memset(nsyscalls, 0, (id + 1) * sizeof(*sc)); | |
400 | } | |
401 | ||
402 | trace->syscalls.table = nsyscalls; | |
403 | trace->syscalls.max = id; | |
404 | } | |
405 | ||
406 | sc = trace->syscalls.table + id; | |
407 | sc->name = name; | |
408 | ||
409 | if (trace->ev_qualifier) { | |
410 | bool in = strlist__find(trace->ev_qualifier, name) != NULL; | |
411 | ||
412 | if (!(in ^ trace->not_ev_qualifier)) { | |
413 | sc->filtered = true; | |
414 | /* | |
415 | * No need to do read tracepoint information since this will be | |
416 | * filtered out. | |
417 | */ | |
418 | return 0; | |
419 | } | |
420 | } | |
421 | ||
422 | sc->fmt = syscall_fmt__find(sc->name); | |
423 | ||
424 | snprintf(tp_name, sizeof(tp_name), "sys_enter_%s", sc->name); | |
425 | sc->tp_format = event_format__new("syscalls", tp_name); | |
426 | ||
427 | if (sc->tp_format == NULL && sc->fmt && sc->fmt->alias) { | |
428 | snprintf(tp_name, sizeof(tp_name), "sys_enter_%s", sc->fmt->alias); | |
429 | sc->tp_format = event_format__new("syscalls", tp_name); | |
430 | } | |
431 | ||
432 | if (sc->tp_format == NULL) | |
433 | return -1; | |
434 | ||
435 | return syscall__set_arg_fmts(sc); | |
436 | } | |
437 | ||
438 | static size_t syscall__scnprintf_args(struct syscall *sc, char *bf, size_t size, | |
439 | unsigned long *args) | |
440 | { | |
441 | int i = 0; | |
442 | size_t printed = 0; | |
443 | ||
444 | if (sc->tp_format != NULL) { | |
445 | struct format_field *field; | |
446 | ||
447 | for (field = sc->tp_format->format.fields->next; field; field = field->next) { | |
448 | printed += scnprintf(bf + printed, size - printed, | |
449 | "%s%s: ", printed ? ", " : "", field->name); | |
450 | ||
451 | if (sc->arg_scnprintf && sc->arg_scnprintf[i]) | |
452 | printed += sc->arg_scnprintf[i](bf + printed, size - printed, args[i]); | |
453 | else | |
454 | printed += scnprintf(bf + printed, size - printed, | |
455 | "%ld", args[i]); | |
456 | ++i; | |
457 | } | |
458 | } else { | |
459 | while (i < 6) { | |
460 | printed += scnprintf(bf + printed, size - printed, | |
461 | "%sarg%d: %ld", | |
462 | printed ? ", " : "", i, args[i]); | |
463 | ++i; | |
464 | } | |
465 | } | |
466 | ||
467 | return printed; | |
468 | } | |
469 | ||
470 | typedef int (*tracepoint_handler)(struct trace *trace, struct perf_evsel *evsel, | |
471 | struct perf_sample *sample); | |
472 | ||
473 | static struct syscall *trace__syscall_info(struct trace *trace, | |
474 | struct perf_evsel *evsel, | |
475 | struct perf_sample *sample) | |
476 | { | |
477 | int id = perf_evsel__intval(evsel, sample, "id"); | |
478 | ||
479 | if (id < 0) { | |
480 | ||
481 | /* | |
482 | * XXX: Noticed on x86_64, reproduced as far back as 3.0.36, haven't tried | |
483 | * before that, leaving at a higher verbosity level till that is | |
484 | * explained. Reproduced with plain ftrace with: | |
485 | * | |
486 | * echo 1 > /t/events/raw_syscalls/sys_exit/enable | |
487 | * grep "NR -1 " /t/trace_pipe | |
488 | * | |
489 | * After generating some load on the machine. | |
490 | */ | |
491 | if (verbose > 1) { | |
492 | static u64 n; | |
493 | fprintf(trace->output, "Invalid syscall %d id, skipping (%s, %" PRIu64 ") ...\n", | |
494 | id, perf_evsel__name(evsel), ++n); | |
495 | } | |
496 | return NULL; | |
497 | } | |
498 | ||
499 | if ((id > trace->syscalls.max || trace->syscalls.table[id].name == NULL) && | |
500 | trace__read_syscall_info(trace, id)) | |
501 | goto out_cant_read; | |
502 | ||
503 | if ((id > trace->syscalls.max || trace->syscalls.table[id].name == NULL)) | |
504 | goto out_cant_read; | |
505 | ||
506 | return &trace->syscalls.table[id]; | |
507 | ||
508 | out_cant_read: | |
509 | if (verbose) { | |
510 | fprintf(trace->output, "Problems reading syscall %d", id); | |
511 | if (id <= trace->syscalls.max && trace->syscalls.table[id].name != NULL) | |
512 | fprintf(trace->output, "(%s)", trace->syscalls.table[id].name); | |
513 | fputs(" information\n", trace->output); | |
514 | } | |
515 | return NULL; | |
516 | } | |
517 | ||
518 | static int trace__sys_enter(struct trace *trace, struct perf_evsel *evsel, | |
519 | struct perf_sample *sample) | |
520 | { | |
521 | char *msg; | |
522 | void *args; | |
523 | size_t printed = 0; | |
524 | struct thread *thread; | |
525 | struct syscall *sc = trace__syscall_info(trace, evsel, sample); | |
526 | struct thread_trace *ttrace; | |
527 | ||
528 | if (sc == NULL) | |
529 | return -1; | |
530 | ||
531 | if (sc->filtered) | |
532 | return 0; | |
533 | ||
534 | thread = machine__findnew_thread(&trace->host, sample->pid, | |
535 | sample->tid); | |
536 | ttrace = thread__trace(thread, trace->output); | |
537 | if (ttrace == NULL) | |
538 | return -1; | |
539 | ||
540 | args = perf_evsel__rawptr(evsel, sample, "args"); | |
541 | if (args == NULL) { | |
542 | fprintf(trace->output, "Problems reading syscall arguments\n"); | |
543 | return -1; | |
544 | } | |
545 | ||
546 | ttrace = thread->priv; | |
547 | ||
548 | if (ttrace->entry_str == NULL) { | |
549 | ttrace->entry_str = malloc(1024); | |
550 | if (!ttrace->entry_str) | |
551 | return -1; | |
552 | } | |
553 | ||
554 | ttrace->entry_time = sample->time; | |
555 | msg = ttrace->entry_str; | |
556 | printed += scnprintf(msg + printed, 1024 - printed, "%s(", sc->name); | |
557 | ||
558 | printed += syscall__scnprintf_args(sc, msg + printed, 1024 - printed, args); | |
559 | ||
560 | if (!strcmp(sc->name, "exit_group") || !strcmp(sc->name, "exit")) { | |
561 | if (!trace->duration_filter) { | |
562 | trace__fprintf_entry_head(trace, thread, 1, sample->time, trace->output); | |
563 | fprintf(trace->output, "%-70s\n", ttrace->entry_str); | |
564 | } | |
565 | } else | |
566 | ttrace->entry_pending = true; | |
567 | ||
568 | return 0; | |
569 | } | |
570 | ||
571 | static int trace__sys_exit(struct trace *trace, struct perf_evsel *evsel, | |
572 | struct perf_sample *sample) | |
573 | { | |
574 | int ret; | |
575 | u64 duration = 0; | |
576 | struct thread *thread; | |
577 | struct syscall *sc = trace__syscall_info(trace, evsel, sample); | |
578 | struct thread_trace *ttrace; | |
579 | ||
580 | if (sc == NULL) | |
581 | return -1; | |
582 | ||
583 | if (sc->filtered) | |
584 | return 0; | |
585 | ||
586 | thread = machine__findnew_thread(&trace->host, sample->pid, | |
587 | sample->tid); | |
588 | ttrace = thread__trace(thread, trace->output); | |
589 | if (ttrace == NULL) | |
590 | return -1; | |
591 | ||
592 | ret = perf_evsel__intval(evsel, sample, "ret"); | |
593 | ||
594 | ttrace = thread->priv; | |
595 | ||
596 | ttrace->exit_time = sample->time; | |
597 | ||
598 | if (ttrace->entry_time) { | |
599 | duration = sample->time - ttrace->entry_time; | |
600 | if (trace__filter_duration(trace, duration)) | |
601 | goto out; | |
602 | } else if (trace->duration_filter) | |
603 | goto out; | |
604 | ||
605 | trace__fprintf_entry_head(trace, thread, duration, sample->time, trace->output); | |
606 | ||
607 | if (ttrace->entry_pending) { | |
608 | fprintf(trace->output, "%-70s", ttrace->entry_str); | |
609 | } else { | |
610 | fprintf(trace->output, " ... ["); | |
611 | color_fprintf(trace->output, PERF_COLOR_YELLOW, "continued"); | |
612 | fprintf(trace->output, "]: %s()", sc->name); | |
613 | } | |
614 | ||
615 | if (sc->fmt == NULL) { | |
616 | signed_print: | |
617 | fprintf(trace->output, ") = %d", ret); | |
618 | } else if (ret < 0 && sc->fmt->errmsg) { | |
619 | char bf[256]; | |
620 | const char *emsg = strerror_r(-ret, bf, sizeof(bf)), | |
621 | *e = audit_errno_to_name(-ret); | |
622 | ||
623 | fprintf(trace->output, ") = -1 %s %s", e, emsg); | |
624 | } else if (ret == 0 && sc->fmt->timeout) | |
625 | fprintf(trace->output, ") = 0 Timeout"); | |
626 | else if (sc->fmt->hexret) | |
627 | fprintf(trace->output, ") = %#x", ret); | |
628 | else | |
629 | goto signed_print; | |
630 | ||
631 | fputc('\n', trace->output); | |
632 | out: | |
633 | ttrace->entry_pending = false; | |
634 | ||
635 | return 0; | |
636 | } | |
637 | ||
638 | static int trace__sched_stat_runtime(struct trace *trace, struct perf_evsel *evsel, | |
639 | struct perf_sample *sample) | |
640 | { | |
641 | u64 runtime = perf_evsel__intval(evsel, sample, "runtime"); | |
642 | double runtime_ms = (double)runtime / NSEC_PER_MSEC; | |
643 | struct thread *thread = machine__findnew_thread(&trace->host, | |
644 | sample->pid, | |
645 | sample->tid); | |
646 | struct thread_trace *ttrace = thread__trace(thread, trace->output); | |
647 | ||
648 | if (ttrace == NULL) | |
649 | goto out_dump; | |
650 | ||
651 | ttrace->runtime_ms += runtime_ms; | |
652 | trace->runtime_ms += runtime_ms; | |
653 | return 0; | |
654 | ||
655 | out_dump: | |
656 | fprintf(trace->output, "%s: comm=%s,pid=%u,runtime=%" PRIu64 ",vruntime=%" PRIu64 ")\n", | |
657 | evsel->name, | |
658 | perf_evsel__strval(evsel, sample, "comm"), | |
659 | (pid_t)perf_evsel__intval(evsel, sample, "pid"), | |
660 | runtime, | |
661 | perf_evsel__intval(evsel, sample, "vruntime")); | |
662 | return 0; | |
663 | } | |
664 | ||
665 | static bool skip_sample(struct trace *trace, struct perf_sample *sample) | |
666 | { | |
667 | if ((trace->pid_list && intlist__find(trace->pid_list, sample->pid)) || | |
668 | (trace->tid_list && intlist__find(trace->tid_list, sample->tid))) | |
669 | return false; | |
670 | ||
671 | if (trace->pid_list || trace->tid_list) | |
672 | return true; | |
673 | ||
674 | return false; | |
675 | } | |
676 | ||
677 | static int trace__process_sample(struct perf_tool *tool, | |
678 | union perf_event *event __maybe_unused, | |
679 | struct perf_sample *sample, | |
680 | struct perf_evsel *evsel, | |
681 | struct machine *machine __maybe_unused) | |
682 | { | |
683 | struct trace *trace = container_of(tool, struct trace, tool); | |
684 | int err = 0; | |
685 | ||
686 | tracepoint_handler handler = evsel->handler.func; | |
687 | ||
688 | if (skip_sample(trace, sample)) | |
689 | return 0; | |
690 | ||
691 | if (trace->base_time == 0) | |
692 | trace->base_time = sample->time; | |
693 | ||
694 | if (handler) | |
695 | handler(trace, evsel, sample); | |
696 | ||
697 | return err; | |
698 | } | |
699 | ||
700 | static bool | |
701 | perf_session__has_tp(struct perf_session *session, const char *name) | |
702 | { | |
703 | struct perf_evsel *evsel; | |
704 | ||
705 | evsel = perf_evlist__find_tracepoint_by_name(session->evlist, name); | |
706 | ||
707 | return evsel != NULL; | |
708 | } | |
709 | ||
710 | static int parse_target_str(struct trace *trace) | |
711 | { | |
712 | if (trace->opts.target.pid) { | |
713 | trace->pid_list = intlist__new(trace->opts.target.pid); | |
714 | if (trace->pid_list == NULL) { | |
715 | pr_err("Error parsing process id string\n"); | |
716 | return -EINVAL; | |
717 | } | |
718 | } | |
719 | ||
720 | if (trace->opts.target.tid) { | |
721 | trace->tid_list = intlist__new(trace->opts.target.tid); | |
722 | if (trace->tid_list == NULL) { | |
723 | pr_err("Error parsing thread id string\n"); | |
724 | return -EINVAL; | |
725 | } | |
726 | } | |
727 | ||
728 | return 0; | |
729 | } | |
730 | ||
731 | static int trace__run(struct trace *trace, int argc, const char **argv) | |
732 | { | |
733 | struct perf_evlist *evlist = perf_evlist__new(); | |
734 | struct perf_evsel *evsel; | |
735 | int err = -1, i; | |
736 | unsigned long before; | |
737 | const bool forks = argc > 0; | |
738 | ||
739 | if (evlist == NULL) { | |
740 | fprintf(trace->output, "Not enough memory to run!\n"); | |
741 | goto out; | |
742 | } | |
743 | ||
744 | if (perf_evlist__add_newtp(evlist, "raw_syscalls", "sys_enter", trace__sys_enter) || | |
745 | perf_evlist__add_newtp(evlist, "raw_syscalls", "sys_exit", trace__sys_exit)) { | |
746 | fprintf(trace->output, "Couldn't read the raw_syscalls tracepoints information!\n"); | |
747 | goto out_delete_evlist; | |
748 | } | |
749 | ||
750 | if (trace->sched && | |
751 | perf_evlist__add_newtp(evlist, "sched", "sched_stat_runtime", | |
752 | trace__sched_stat_runtime)) { | |
753 | fprintf(trace->output, "Couldn't read the sched_stat_runtime tracepoint information!\n"); | |
754 | goto out_delete_evlist; | |
755 | } | |
756 | ||
757 | err = perf_evlist__create_maps(evlist, &trace->opts.target); | |
758 | if (err < 0) { | |
759 | fprintf(trace->output, "Problems parsing the target to trace, check your options!\n"); | |
760 | goto out_delete_evlist; | |
761 | } | |
762 | ||
763 | err = trace__symbols_init(trace, evlist); | |
764 | if (err < 0) { | |
765 | fprintf(trace->output, "Problems initializing symbol libraries!\n"); | |
766 | goto out_delete_maps; | |
767 | } | |
768 | ||
769 | perf_evlist__config(evlist, &trace->opts); | |
770 | ||
771 | signal(SIGCHLD, sig_handler); | |
772 | signal(SIGINT, sig_handler); | |
773 | ||
774 | if (forks) { | |
775 | err = perf_evlist__prepare_workload(evlist, &trace->opts.target, | |
776 | argv, false, false); | |
777 | if (err < 0) { | |
778 | fprintf(trace->output, "Couldn't run the workload!\n"); | |
779 | goto out_delete_maps; | |
780 | } | |
781 | } | |
782 | ||
783 | err = perf_evlist__open(evlist); | |
784 | if (err < 0) { | |
785 | fprintf(trace->output, "Couldn't create the events: %s\n", strerror(errno)); | |
786 | goto out_delete_maps; | |
787 | } | |
788 | ||
789 | err = perf_evlist__mmap(evlist, UINT_MAX, false); | |
790 | if (err < 0) { | |
791 | fprintf(trace->output, "Couldn't mmap the events: %s\n", strerror(errno)); | |
792 | goto out_close_evlist; | |
793 | } | |
794 | ||
795 | perf_evlist__enable(evlist); | |
796 | ||
797 | if (forks) | |
798 | perf_evlist__start_workload(evlist); | |
799 | ||
800 | trace->multiple_threads = evlist->threads->map[0] == -1 || evlist->threads->nr > 1; | |
801 | again: | |
802 | before = trace->nr_events; | |
803 | ||
804 | for (i = 0; i < evlist->nr_mmaps; i++) { | |
805 | union perf_event *event; | |
806 | ||
807 | while ((event = perf_evlist__mmap_read(evlist, i)) != NULL) { | |
808 | const u32 type = event->header.type; | |
809 | tracepoint_handler handler; | |
810 | struct perf_sample sample; | |
811 | ||
812 | ++trace->nr_events; | |
813 | ||
814 | err = perf_evlist__parse_sample(evlist, event, &sample); | |
815 | if (err) { | |
816 | fprintf(trace->output, "Can't parse sample, err = %d, skipping...\n", err); | |
817 | continue; | |
818 | } | |
819 | ||
820 | if (trace->base_time == 0) | |
821 | trace->base_time = sample.time; | |
822 | ||
823 | if (type != PERF_RECORD_SAMPLE) { | |
824 | trace__process_event(trace, &trace->host, event); | |
825 | continue; | |
826 | } | |
827 | ||
828 | evsel = perf_evlist__id2evsel(evlist, sample.id); | |
829 | if (evsel == NULL) { | |
830 | fprintf(trace->output, "Unknown tp ID %" PRIu64 ", skipping...\n", sample.id); | |
831 | continue; | |
832 | } | |
833 | ||
834 | if (sample.raw_data == NULL) { | |
835 | fprintf(trace->output, "%s sample with no payload for tid: %d, cpu %d, raw_size=%d, skipping...\n", | |
836 | perf_evsel__name(evsel), sample.tid, | |
837 | sample.cpu, sample.raw_size); | |
838 | continue; | |
839 | } | |
840 | ||
841 | handler = evsel->handler.func; | |
842 | handler(trace, evsel, &sample); | |
843 | } | |
844 | } | |
845 | ||
846 | if (trace->nr_events == before) { | |
847 | if (done) | |
848 | goto out_unmap_evlist; | |
849 | ||
850 | poll(evlist->pollfd, evlist->nr_fds, -1); | |
851 | } | |
852 | ||
853 | if (done) | |
854 | perf_evlist__disable(evlist); | |
855 | ||
856 | goto again; | |
857 | ||
858 | out_unmap_evlist: | |
859 | perf_evlist__munmap(evlist); | |
860 | out_close_evlist: | |
861 | perf_evlist__close(evlist); | |
862 | out_delete_maps: | |
863 | perf_evlist__delete_maps(evlist); | |
864 | out_delete_evlist: | |
865 | perf_evlist__delete(evlist); | |
866 | out: | |
867 | return err; | |
868 | } | |
869 | ||
870 | static int trace__replay(struct trace *trace) | |
871 | { | |
872 | const struct perf_evsel_str_handler handlers[] = { | |
873 | { "raw_syscalls:sys_enter", trace__sys_enter, }, | |
874 | { "raw_syscalls:sys_exit", trace__sys_exit, }, | |
875 | }; | |
876 | ||
877 | struct perf_session *session; | |
878 | int err = -1; | |
879 | ||
880 | trace->tool.sample = trace__process_sample; | |
881 | trace->tool.mmap = perf_event__process_mmap; | |
882 | trace->tool.comm = perf_event__process_comm; | |
883 | trace->tool.exit = perf_event__process_exit; | |
884 | trace->tool.fork = perf_event__process_fork; | |
885 | trace->tool.attr = perf_event__process_attr; | |
886 | trace->tool.tracing_data = perf_event__process_tracing_data; | |
887 | trace->tool.build_id = perf_event__process_build_id; | |
888 | ||
889 | trace->tool.ordered_samples = true; | |
890 | trace->tool.ordering_requires_timestamps = true; | |
891 | ||
892 | /* add tid to output */ | |
893 | trace->multiple_threads = true; | |
894 | ||
895 | if (symbol__init() < 0) | |
896 | return -1; | |
897 | ||
898 | session = perf_session__new(input_name, O_RDONLY, 0, false, | |
899 | &trace->tool); | |
900 | if (session == NULL) | |
901 | return -ENOMEM; | |
902 | ||
903 | err = perf_session__set_tracepoints_handlers(session, handlers); | |
904 | if (err) | |
905 | goto out; | |
906 | ||
907 | if (!perf_session__has_tp(session, "raw_syscalls:sys_enter")) { | |
908 | pr_err("Data file does not have raw_syscalls:sys_enter events\n"); | |
909 | goto out; | |
910 | } | |
911 | ||
912 | if (!perf_session__has_tp(session, "raw_syscalls:sys_exit")) { | |
913 | pr_err("Data file does not have raw_syscalls:sys_exit events\n"); | |
914 | goto out; | |
915 | } | |
916 | ||
917 | err = parse_target_str(trace); | |
918 | if (err != 0) | |
919 | goto out; | |
920 | ||
921 | setup_pager(); | |
922 | ||
923 | err = perf_session__process_events(session, &trace->tool); | |
924 | if (err) | |
925 | pr_err("Failed to process events, error %d", err); | |
926 | ||
927 | out: | |
928 | perf_session__delete(session); | |
929 | ||
930 | return err; | |
931 | } | |
932 | ||
933 | static size_t trace__fprintf_threads_header(FILE *fp) | |
934 | { | |
935 | size_t printed; | |
936 | ||
937 | printed = fprintf(fp, "\n _____________________________________________________________________\n"); | |
938 | printed += fprintf(fp," __) Summary of events (__\n\n"); | |
939 | printed += fprintf(fp," [ task - pid ] [ events ] [ ratio ] [ runtime ]\n"); | |
940 | printed += fprintf(fp," _____________________________________________________________________\n\n"); | |
941 | ||
942 | return printed; | |
943 | } | |
944 | ||
945 | static size_t trace__fprintf_thread_summary(struct trace *trace, FILE *fp) | |
946 | { | |
947 | size_t printed = trace__fprintf_threads_header(fp); | |
948 | struct rb_node *nd; | |
949 | ||
950 | for (nd = rb_first(&trace->host.threads); nd; nd = rb_next(nd)) { | |
951 | struct thread *thread = rb_entry(nd, struct thread, rb_node); | |
952 | struct thread_trace *ttrace = thread->priv; | |
953 | const char *color; | |
954 | double ratio; | |
955 | ||
956 | if (ttrace == NULL) | |
957 | continue; | |
958 | ||
959 | ratio = (double)ttrace->nr_events / trace->nr_events * 100.0; | |
960 | ||
961 | color = PERF_COLOR_NORMAL; | |
962 | if (ratio > 50.0) | |
963 | color = PERF_COLOR_RED; | |
964 | else if (ratio > 25.0) | |
965 | color = PERF_COLOR_GREEN; | |
966 | else if (ratio > 5.0) | |
967 | color = PERF_COLOR_YELLOW; | |
968 | ||
969 | printed += color_fprintf(fp, color, "%20s", thread->comm); | |
970 | printed += fprintf(fp, " - %-5d :%11lu [", thread->tid, ttrace->nr_events); | |
971 | printed += color_fprintf(fp, color, "%5.1f%%", ratio); | |
972 | printed += fprintf(fp, " ] %10.3f ms\n", ttrace->runtime_ms); | |
973 | } | |
974 | ||
975 | return printed; | |
976 | } | |
977 | ||
978 | static int trace__set_duration(const struct option *opt, const char *str, | |
979 | int unset __maybe_unused) | |
980 | { | |
981 | struct trace *trace = opt->value; | |
982 | ||
983 | trace->duration_filter = atof(str); | |
984 | return 0; | |
985 | } | |
986 | ||
987 | static int trace__open_output(struct trace *trace, const char *filename) | |
988 | { | |
989 | struct stat st; | |
990 | ||
991 | if (!stat(filename, &st) && st.st_size) { | |
992 | char oldname[PATH_MAX]; | |
993 | ||
994 | scnprintf(oldname, sizeof(oldname), "%s.old", filename); | |
995 | unlink(oldname); | |
996 | rename(filename, oldname); | |
997 | } | |
998 | ||
999 | trace->output = fopen(filename, "w"); | |
1000 | ||
1001 | return trace->output == NULL ? -errno : 0; | |
1002 | } | |
1003 | ||
1004 | int cmd_trace(int argc, const char **argv, const char *prefix __maybe_unused) | |
1005 | { | |
1006 | const char * const trace_usage[] = { | |
1007 | "perf trace [<options>] [<command>]", | |
1008 | "perf trace [<options>] -- <command> [<options>]", | |
1009 | NULL | |
1010 | }; | |
1011 | struct trace trace = { | |
1012 | .audit_machine = audit_detect_machine(), | |
1013 | .syscalls = { | |
1014 | . max = -1, | |
1015 | }, | |
1016 | .opts = { | |
1017 | .target = { | |
1018 | .uid = UINT_MAX, | |
1019 | .uses_mmap = true, | |
1020 | }, | |
1021 | .user_freq = UINT_MAX, | |
1022 | .user_interval = ULLONG_MAX, | |
1023 | .no_delay = true, | |
1024 | .mmap_pages = 1024, | |
1025 | }, | |
1026 | .output = stdout, | |
1027 | }; | |
1028 | const char *output_name = NULL; | |
1029 | const char *ev_qualifier_str = NULL; | |
1030 | const struct option trace_options[] = { | |
1031 | OPT_STRING('e', "expr", &ev_qualifier_str, "expr", | |
1032 | "list of events to trace"), | |
1033 | OPT_STRING('o', "output", &output_name, "file", "output file name"), | |
1034 | OPT_STRING('i', "input", &input_name, "file", "Analyze events in file"), | |
1035 | OPT_STRING('p', "pid", &trace.opts.target.pid, "pid", | |
1036 | "trace events on existing process id"), | |
1037 | OPT_STRING('t', "tid", &trace.opts.target.tid, "tid", | |
1038 | "trace events on existing thread id"), | |
1039 | OPT_BOOLEAN('a', "all-cpus", &trace.opts.target.system_wide, | |
1040 | "system-wide collection from all CPUs"), | |
1041 | OPT_STRING('C', "cpu", &trace.opts.target.cpu_list, "cpu", | |
1042 | "list of cpus to monitor"), | |
1043 | OPT_BOOLEAN(0, "no-inherit", &trace.opts.no_inherit, | |
1044 | "child tasks do not inherit counters"), | |
1045 | OPT_UINTEGER('m', "mmap-pages", &trace.opts.mmap_pages, | |
1046 | "number of mmap data pages"), | |
1047 | OPT_STRING('u', "uid", &trace.opts.target.uid_str, "user", | |
1048 | "user to profile"), | |
1049 | OPT_CALLBACK(0, "duration", &trace, "float", | |
1050 | "show only events with duration > N.M ms", | |
1051 | trace__set_duration), | |
1052 | OPT_BOOLEAN(0, "sched", &trace.sched, "show blocking scheduler events"), | |
1053 | OPT_INCR('v', "verbose", &verbose, "be more verbose"), | |
1054 | OPT_END() | |
1055 | }; | |
1056 | int err; | |
1057 | char bf[BUFSIZ]; | |
1058 | ||
1059 | argc = parse_options(argc, argv, trace_options, trace_usage, 0); | |
1060 | ||
1061 | if (output_name != NULL) { | |
1062 | err = trace__open_output(&trace, output_name); | |
1063 | if (err < 0) { | |
1064 | perror("failed to create output file"); | |
1065 | goto out; | |
1066 | } | |
1067 | } | |
1068 | ||
1069 | if (ev_qualifier_str != NULL) { | |
1070 | const char *s = ev_qualifier_str; | |
1071 | ||
1072 | trace.not_ev_qualifier = *s == '!'; | |
1073 | if (trace.not_ev_qualifier) | |
1074 | ++s; | |
1075 | trace.ev_qualifier = strlist__new(true, s); | |
1076 | if (trace.ev_qualifier == NULL) { | |
1077 | fputs("Not enough memory to parse event qualifier", | |
1078 | trace.output); | |
1079 | err = -ENOMEM; | |
1080 | goto out_close; | |
1081 | } | |
1082 | } | |
1083 | ||
1084 | err = perf_target__validate(&trace.opts.target); | |
1085 | if (err) { | |
1086 | perf_target__strerror(&trace.opts.target, err, bf, sizeof(bf)); | |
1087 | fprintf(trace.output, "%s", bf); | |
1088 | goto out_close; | |
1089 | } | |
1090 | ||
1091 | err = perf_target__parse_uid(&trace.opts.target); | |
1092 | if (err) { | |
1093 | perf_target__strerror(&trace.opts.target, err, bf, sizeof(bf)); | |
1094 | fprintf(trace.output, "%s", bf); | |
1095 | goto out_close; | |
1096 | } | |
1097 | ||
1098 | if (!argc && perf_target__none(&trace.opts.target)) | |
1099 | trace.opts.target.system_wide = true; | |
1100 | ||
1101 | if (input_name) | |
1102 | err = trace__replay(&trace); | |
1103 | else | |
1104 | err = trace__run(&trace, argc, argv); | |
1105 | ||
1106 | if (trace.sched && !err) | |
1107 | trace__fprintf_thread_summary(&trace, trace.output); | |
1108 | ||
1109 | out_close: | |
1110 | if (output_name != NULL) | |
1111 | fclose(trace.output); | |
1112 | out: | |
1113 | return err; | |
1114 | } |