]> git.proxmox.com Git - mirror_ubuntu-artful-kernel.git/blob - tools/perf/builtin-record.c
perf pmu: Pass pmu as a parameter to get_cpuid_str()
[mirror_ubuntu-artful-kernel.git] / tools / perf / builtin-record.c
1 /*
2 * builtin-record.c
3 *
4 * Builtin record command: Record the profile of a workload
5 * (or a CPU, or a PID) into the perf.data output file - for
6 * later analysis via perf report.
7 */
8 #include "builtin.h"
9
10 #include "perf.h"
11
12 #include "util/build-id.h"
13 #include "util/util.h"
14 #include <subcmd/parse-options.h>
15 #include "util/parse-events.h"
16 #include "util/config.h"
17
18 #include "util/callchain.h"
19 #include "util/cgroup.h"
20 #include "util/header.h"
21 #include "util/event.h"
22 #include "util/evlist.h"
23 #include "util/evsel.h"
24 #include "util/debug.h"
25 #include "util/drv_configs.h"
26 #include "util/session.h"
27 #include "util/tool.h"
28 #include "util/symbol.h"
29 #include "util/cpumap.h"
30 #include "util/thread_map.h"
31 #include "util/data.h"
32 #include "util/perf_regs.h"
33 #include "util/auxtrace.h"
34 #include "util/tsc.h"
35 #include "util/parse-branch-options.h"
36 #include "util/parse-regs-options.h"
37 #include "util/llvm-utils.h"
38 #include "util/bpf-loader.h"
39 #include "util/trigger.h"
40 #include "util/perf-hooks.h"
41 #include "util/time-utils.h"
42 #include "util/units.h"
43 #include "asm/bug.h"
44
45 #include <errno.h>
46 #include <inttypes.h>
47 #include <poll.h>
48 #include <unistd.h>
49 #include <sched.h>
50 #include <signal.h>
51 #include <sys/mman.h>
52 #include <sys/wait.h>
53 #include <asm/bug.h>
54 #include <linux/time64.h>
55
56 struct switch_output {
57 bool enabled;
58 bool signal;
59 unsigned long size;
60 unsigned long time;
61 const char *str;
62 bool set;
63 };
64
65 struct record {
66 struct perf_tool tool;
67 struct record_opts opts;
68 u64 bytes_written;
69 struct perf_data_file file;
70 struct auxtrace_record *itr;
71 struct perf_evlist *evlist;
72 struct perf_session *session;
73 const char *progname;
74 int realtime_prio;
75 bool no_buildid;
76 bool no_buildid_set;
77 bool no_buildid_cache;
78 bool no_buildid_cache_set;
79 bool buildid_all;
80 bool timestamp_filename;
81 struct switch_output switch_output;
82 unsigned long long samples;
83 };
84
85 static volatile int auxtrace_record__snapshot_started;
86 static DEFINE_TRIGGER(auxtrace_snapshot_trigger);
87 static DEFINE_TRIGGER(switch_output_trigger);
88
89 static bool switch_output_signal(struct record *rec)
90 {
91 return rec->switch_output.signal &&
92 trigger_is_ready(&switch_output_trigger);
93 }
94
95 static bool switch_output_size(struct record *rec)
96 {
97 return rec->switch_output.size &&
98 trigger_is_ready(&switch_output_trigger) &&
99 (rec->bytes_written >= rec->switch_output.size);
100 }
101
102 static bool switch_output_time(struct record *rec)
103 {
104 return rec->switch_output.time &&
105 trigger_is_ready(&switch_output_trigger);
106 }
107
108 static int record__write(struct record *rec, void *bf, size_t size)
109 {
110 if (perf_data_file__write(rec->session->file, bf, size) < 0) {
111 pr_err("failed to write perf data, error: %m\n");
112 return -1;
113 }
114
115 rec->bytes_written += size;
116
117 if (switch_output_size(rec))
118 trigger_hit(&switch_output_trigger);
119
120 return 0;
121 }
122
123 static int process_synthesized_event(struct perf_tool *tool,
124 union perf_event *event,
125 struct perf_sample *sample __maybe_unused,
126 struct machine *machine __maybe_unused)
127 {
128 struct record *rec = container_of(tool, struct record, tool);
129 return record__write(rec, event, event->header.size);
130 }
131
132 static int
133 backward_rb_find_range(void *buf, int mask, u64 head, u64 *start, u64 *end)
134 {
135 struct perf_event_header *pheader;
136 u64 evt_head = head;
137 int size = mask + 1;
138
139 pr_debug2("backward_rb_find_range: buf=%p, head=%"PRIx64"\n", buf, head);
140 pheader = (struct perf_event_header *)(buf + (head & mask));
141 *start = head;
142 while (true) {
143 if (evt_head - head >= (unsigned int)size) {
144 pr_debug("Finished reading backward ring buffer: rewind\n");
145 if (evt_head - head > (unsigned int)size)
146 evt_head -= pheader->size;
147 *end = evt_head;
148 return 0;
149 }
150
151 pheader = (struct perf_event_header *)(buf + (evt_head & mask));
152
153 if (pheader->size == 0) {
154 pr_debug("Finished reading backward ring buffer: get start\n");
155 *end = evt_head;
156 return 0;
157 }
158
159 evt_head += pheader->size;
160 pr_debug3("move evt_head: %"PRIx64"\n", evt_head);
161 }
162 WARN_ONCE(1, "Shouldn't get here\n");
163 return -1;
164 }
165
166 static int
167 rb_find_range(void *data, int mask, u64 head, u64 old,
168 u64 *start, u64 *end, bool backward)
169 {
170 if (!backward) {
171 *start = old;
172 *end = head;
173 return 0;
174 }
175
176 return backward_rb_find_range(data, mask, head, start, end);
177 }
178
179 static int
180 record__mmap_read(struct record *rec, struct perf_mmap *md,
181 bool overwrite, bool backward)
182 {
183 u64 head = perf_mmap__read_head(md);
184 u64 old = md->prev;
185 u64 end = head, start = old;
186 unsigned char *data = md->base + page_size;
187 unsigned long size;
188 void *buf;
189 int rc = 0;
190
191 if (rb_find_range(data, md->mask, head,
192 old, &start, &end, backward))
193 return -1;
194
195 if (start == end)
196 return 0;
197
198 rec->samples++;
199
200 size = end - start;
201 if (size > (unsigned long)(md->mask) + 1) {
202 WARN_ONCE(1, "failed to keep up with mmap data. (warn only once)\n");
203
204 md->prev = head;
205 perf_mmap__consume(md, overwrite || backward);
206 return 0;
207 }
208
209 if ((start & md->mask) + size != (end & md->mask)) {
210 buf = &data[start & md->mask];
211 size = md->mask + 1 - (start & md->mask);
212 start += size;
213
214 if (record__write(rec, buf, size) < 0) {
215 rc = -1;
216 goto out;
217 }
218 }
219
220 buf = &data[start & md->mask];
221 size = end - start;
222 start += size;
223
224 if (record__write(rec, buf, size) < 0) {
225 rc = -1;
226 goto out;
227 }
228
229 md->prev = head;
230 perf_mmap__consume(md, overwrite || backward);
231 out:
232 return rc;
233 }
234
235 static volatile int done;
236 static volatile int signr = -1;
237 static volatile int child_finished;
238
239 static void sig_handler(int sig)
240 {
241 if (sig == SIGCHLD)
242 child_finished = 1;
243 else
244 signr = sig;
245
246 done = 1;
247 }
248
249 static void sigsegv_handler(int sig)
250 {
251 perf_hooks__recover();
252 sighandler_dump_stack(sig);
253 }
254
255 static void record__sig_exit(void)
256 {
257 if (signr == -1)
258 return;
259
260 signal(signr, SIG_DFL);
261 raise(signr);
262 }
263
264 #ifdef HAVE_AUXTRACE_SUPPORT
265
266 static int record__process_auxtrace(struct perf_tool *tool,
267 union perf_event *event, void *data1,
268 size_t len1, void *data2, size_t len2)
269 {
270 struct record *rec = container_of(tool, struct record, tool);
271 struct perf_data_file *file = &rec->file;
272 size_t padding;
273 u8 pad[8] = {0};
274
275 if (!perf_data_file__is_pipe(file)) {
276 off_t file_offset;
277 int fd = perf_data_file__fd(file);
278 int err;
279
280 file_offset = lseek(fd, 0, SEEK_CUR);
281 if (file_offset == -1)
282 return -1;
283 err = auxtrace_index__auxtrace_event(&rec->session->auxtrace_index,
284 event, file_offset);
285 if (err)
286 return err;
287 }
288
289 /* event.auxtrace.size includes padding, see __auxtrace_mmap__read() */
290 padding = (len1 + len2) & 7;
291 if (padding)
292 padding = 8 - padding;
293
294 record__write(rec, event, event->header.size);
295 record__write(rec, data1, len1);
296 if (len2)
297 record__write(rec, data2, len2);
298 record__write(rec, &pad, padding);
299
300 return 0;
301 }
302
303 static int record__auxtrace_mmap_read(struct record *rec,
304 struct auxtrace_mmap *mm)
305 {
306 int ret;
307
308 ret = auxtrace_mmap__read(mm, rec->itr, &rec->tool,
309 record__process_auxtrace);
310 if (ret < 0)
311 return ret;
312
313 if (ret)
314 rec->samples++;
315
316 return 0;
317 }
318
319 static int record__auxtrace_mmap_read_snapshot(struct record *rec,
320 struct auxtrace_mmap *mm)
321 {
322 int ret;
323
324 ret = auxtrace_mmap__read_snapshot(mm, rec->itr, &rec->tool,
325 record__process_auxtrace,
326 rec->opts.auxtrace_snapshot_size);
327 if (ret < 0)
328 return ret;
329
330 if (ret)
331 rec->samples++;
332
333 return 0;
334 }
335
336 static int record__auxtrace_read_snapshot_all(struct record *rec)
337 {
338 int i;
339 int rc = 0;
340
341 for (i = 0; i < rec->evlist->nr_mmaps; i++) {
342 struct auxtrace_mmap *mm =
343 &rec->evlist->mmap[i].auxtrace_mmap;
344
345 if (!mm->base)
346 continue;
347
348 if (record__auxtrace_mmap_read_snapshot(rec, mm) != 0) {
349 rc = -1;
350 goto out;
351 }
352 }
353 out:
354 return rc;
355 }
356
357 static void record__read_auxtrace_snapshot(struct record *rec)
358 {
359 pr_debug("Recording AUX area tracing snapshot\n");
360 if (record__auxtrace_read_snapshot_all(rec) < 0) {
361 trigger_error(&auxtrace_snapshot_trigger);
362 } else {
363 if (auxtrace_record__snapshot_finish(rec->itr))
364 trigger_error(&auxtrace_snapshot_trigger);
365 else
366 trigger_ready(&auxtrace_snapshot_trigger);
367 }
368 }
369
370 #else
371
372 static inline
373 int record__auxtrace_mmap_read(struct record *rec __maybe_unused,
374 struct auxtrace_mmap *mm __maybe_unused)
375 {
376 return 0;
377 }
378
379 static inline
380 void record__read_auxtrace_snapshot(struct record *rec __maybe_unused)
381 {
382 }
383
384 static inline
385 int auxtrace_record__snapshot_start(struct auxtrace_record *itr __maybe_unused)
386 {
387 return 0;
388 }
389
390 #endif
391
392 static int record__mmap_evlist(struct record *rec,
393 struct perf_evlist *evlist)
394 {
395 struct record_opts *opts = &rec->opts;
396 char msg[512];
397
398 if (perf_evlist__mmap_ex(evlist, opts->mmap_pages, false,
399 opts->auxtrace_mmap_pages,
400 opts->auxtrace_snapshot_mode) < 0) {
401 if (errno == EPERM) {
402 pr_err("Permission error mapping pages.\n"
403 "Consider increasing "
404 "/proc/sys/kernel/perf_event_mlock_kb,\n"
405 "or try again with a smaller value of -m/--mmap_pages.\n"
406 "(current value: %u,%u)\n",
407 opts->mmap_pages, opts->auxtrace_mmap_pages);
408 return -errno;
409 } else {
410 pr_err("failed to mmap with %d (%s)\n", errno,
411 str_error_r(errno, msg, sizeof(msg)));
412 if (errno)
413 return -errno;
414 else
415 return -EINVAL;
416 }
417 }
418 return 0;
419 }
420
421 static int record__mmap(struct record *rec)
422 {
423 return record__mmap_evlist(rec, rec->evlist);
424 }
425
426 static int record__open(struct record *rec)
427 {
428 char msg[BUFSIZ];
429 struct perf_evsel *pos;
430 struct perf_evlist *evlist = rec->evlist;
431 struct perf_session *session = rec->session;
432 struct record_opts *opts = &rec->opts;
433 struct perf_evsel_config_term *err_term;
434 int rc = 0;
435
436 perf_evlist__config(evlist, opts, &callchain_param);
437
438 evlist__for_each_entry(evlist, pos) {
439 try_again:
440 if (perf_evsel__open(pos, pos->cpus, pos->threads) < 0) {
441 if (perf_evsel__fallback(pos, errno, msg, sizeof(msg))) {
442 if (verbose > 0)
443 ui__warning("%s\n", msg);
444 goto try_again;
445 }
446
447 rc = -errno;
448 perf_evsel__open_strerror(pos, &opts->target,
449 errno, msg, sizeof(msg));
450 ui__error("%s\n", msg);
451 goto out;
452 }
453 }
454
455 if (perf_evlist__apply_filters(evlist, &pos)) {
456 pr_err("failed to set filter \"%s\" on event %s with %d (%s)\n",
457 pos->filter, perf_evsel__name(pos), errno,
458 str_error_r(errno, msg, sizeof(msg)));
459 rc = -1;
460 goto out;
461 }
462
463 if (perf_evlist__apply_drv_configs(evlist, &pos, &err_term)) {
464 pr_err("failed to set config \"%s\" on event %s with %d (%s)\n",
465 err_term->val.drv_cfg, perf_evsel__name(pos), errno,
466 str_error_r(errno, msg, sizeof(msg)));
467 rc = -1;
468 goto out;
469 }
470
471 rc = record__mmap(rec);
472 if (rc)
473 goto out;
474
475 session->evlist = evlist;
476 perf_session__set_id_hdr_size(session);
477 out:
478 return rc;
479 }
480
481 static int process_sample_event(struct perf_tool *tool,
482 union perf_event *event,
483 struct perf_sample *sample,
484 struct perf_evsel *evsel,
485 struct machine *machine)
486 {
487 struct record *rec = container_of(tool, struct record, tool);
488
489 rec->samples++;
490
491 return build_id__mark_dso_hit(tool, event, sample, evsel, machine);
492 }
493
494 static int process_buildids(struct record *rec)
495 {
496 struct perf_data_file *file = &rec->file;
497 struct perf_session *session = rec->session;
498
499 if (file->size == 0)
500 return 0;
501
502 /*
503 * During this process, it'll load kernel map and replace the
504 * dso->long_name to a real pathname it found. In this case
505 * we prefer the vmlinux path like
506 * /lib/modules/3.16.4/build/vmlinux
507 *
508 * rather than build-id path (in debug directory).
509 * $HOME/.debug/.build-id/f0/6e17aa50adf4d00b88925e03775de107611551
510 */
511 symbol_conf.ignore_vmlinux_buildid = true;
512
513 /*
514 * If --buildid-all is given, it marks all DSO regardless of hits,
515 * so no need to process samples.
516 */
517 if (rec->buildid_all)
518 rec->tool.sample = NULL;
519
520 return perf_session__process_events(session);
521 }
522
523 static void perf_event__synthesize_guest_os(struct machine *machine, void *data)
524 {
525 int err;
526 struct perf_tool *tool = data;
527 /*
528 *As for guest kernel when processing subcommand record&report,
529 *we arrange module mmap prior to guest kernel mmap and trigger
530 *a preload dso because default guest module symbols are loaded
531 *from guest kallsyms instead of /lib/modules/XXX/XXX. This
532 *method is used to avoid symbol missing when the first addr is
533 *in module instead of in guest kernel.
534 */
535 err = perf_event__synthesize_modules(tool, process_synthesized_event,
536 machine);
537 if (err < 0)
538 pr_err("Couldn't record guest kernel [%d]'s reference"
539 " relocation symbol.\n", machine->pid);
540
541 /*
542 * We use _stext for guest kernel because guest kernel's /proc/kallsyms
543 * have no _text sometimes.
544 */
545 err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event,
546 machine);
547 if (err < 0)
548 pr_err("Couldn't record guest kernel [%d]'s reference"
549 " relocation symbol.\n", machine->pid);
550 }
551
552 static struct perf_event_header finished_round_event = {
553 .size = sizeof(struct perf_event_header),
554 .type = PERF_RECORD_FINISHED_ROUND,
555 };
556
557 static int record__mmap_read_evlist(struct record *rec, struct perf_evlist *evlist,
558 bool backward)
559 {
560 u64 bytes_written = rec->bytes_written;
561 int i;
562 int rc = 0;
563 struct perf_mmap *maps;
564
565 if (!evlist)
566 return 0;
567
568 maps = backward ? evlist->backward_mmap : evlist->mmap;
569 if (!maps)
570 return 0;
571
572 if (backward && evlist->bkw_mmap_state != BKW_MMAP_DATA_PENDING)
573 return 0;
574
575 for (i = 0; i < evlist->nr_mmaps; i++) {
576 struct auxtrace_mmap *mm = &maps[i].auxtrace_mmap;
577
578 if (maps[i].base) {
579 if (record__mmap_read(rec, &maps[i],
580 evlist->overwrite, backward) != 0) {
581 rc = -1;
582 goto out;
583 }
584 }
585
586 if (mm->base && !rec->opts.auxtrace_snapshot_mode &&
587 record__auxtrace_mmap_read(rec, mm) != 0) {
588 rc = -1;
589 goto out;
590 }
591 }
592
593 /*
594 * Mark the round finished in case we wrote
595 * at least one event.
596 */
597 if (bytes_written != rec->bytes_written)
598 rc = record__write(rec, &finished_round_event, sizeof(finished_round_event));
599
600 if (backward)
601 perf_evlist__toggle_bkw_mmap(evlist, BKW_MMAP_EMPTY);
602 out:
603 return rc;
604 }
605
606 static int record__mmap_read_all(struct record *rec)
607 {
608 int err;
609
610 err = record__mmap_read_evlist(rec, rec->evlist, false);
611 if (err)
612 return err;
613
614 return record__mmap_read_evlist(rec, rec->evlist, true);
615 }
616
617 static void record__init_features(struct record *rec)
618 {
619 struct perf_session *session = rec->session;
620 int feat;
621
622 for (feat = HEADER_FIRST_FEATURE; feat < HEADER_LAST_FEATURE; feat++)
623 perf_header__set_feat(&session->header, feat);
624
625 if (rec->no_buildid)
626 perf_header__clear_feat(&session->header, HEADER_BUILD_ID);
627
628 if (!have_tracepoints(&rec->evlist->entries))
629 perf_header__clear_feat(&session->header, HEADER_TRACING_DATA);
630
631 if (!rec->opts.branch_stack)
632 perf_header__clear_feat(&session->header, HEADER_BRANCH_STACK);
633
634 if (!rec->opts.full_auxtrace)
635 perf_header__clear_feat(&session->header, HEADER_AUXTRACE);
636
637 perf_header__clear_feat(&session->header, HEADER_STAT);
638 }
639
640 static void
641 record__finish_output(struct record *rec)
642 {
643 struct perf_data_file *file = &rec->file;
644 int fd = perf_data_file__fd(file);
645
646 if (file->is_pipe)
647 return;
648
649 rec->session->header.data_size += rec->bytes_written;
650 file->size = lseek(perf_data_file__fd(file), 0, SEEK_CUR);
651
652 if (!rec->no_buildid) {
653 process_buildids(rec);
654
655 if (rec->buildid_all)
656 dsos__hit_all(rec->session);
657 }
658 perf_session__write_header(rec->session, rec->evlist, fd, true);
659
660 return;
661 }
662
663 static int record__synthesize_workload(struct record *rec, bool tail)
664 {
665 int err;
666 struct thread_map *thread_map;
667
668 if (rec->opts.tail_synthesize != tail)
669 return 0;
670
671 thread_map = thread_map__new_by_tid(rec->evlist->workload.pid);
672 if (thread_map == NULL)
673 return -1;
674
675 err = perf_event__synthesize_thread_map(&rec->tool, thread_map,
676 process_synthesized_event,
677 &rec->session->machines.host,
678 rec->opts.sample_address,
679 rec->opts.proc_map_timeout);
680 thread_map__put(thread_map);
681 return err;
682 }
683
684 static int record__synthesize(struct record *rec, bool tail);
685
686 static int
687 record__switch_output(struct record *rec, bool at_exit)
688 {
689 struct perf_data_file *file = &rec->file;
690 int fd, err;
691
692 /* Same Size: "2015122520103046"*/
693 char timestamp[] = "InvalidTimestamp";
694
695 record__synthesize(rec, true);
696 if (target__none(&rec->opts.target))
697 record__synthesize_workload(rec, true);
698
699 rec->samples = 0;
700 record__finish_output(rec);
701 err = fetch_current_timestamp(timestamp, sizeof(timestamp));
702 if (err) {
703 pr_err("Failed to get current timestamp\n");
704 return -EINVAL;
705 }
706
707 fd = perf_data_file__switch(file, timestamp,
708 rec->session->header.data_offset,
709 at_exit);
710 if (fd >= 0 && !at_exit) {
711 rec->bytes_written = 0;
712 rec->session->header.data_size = 0;
713 }
714
715 if (!quiet)
716 fprintf(stderr, "[ perf record: Dump %s.%s ]\n",
717 file->path, timestamp);
718
719 /* Output tracking events */
720 if (!at_exit) {
721 record__synthesize(rec, false);
722
723 /*
724 * In 'perf record --switch-output' without -a,
725 * record__synthesize() in record__switch_output() won't
726 * generate tracking events because there's no thread_map
727 * in evlist. Which causes newly created perf.data doesn't
728 * contain map and comm information.
729 * Create a fake thread_map and directly call
730 * perf_event__synthesize_thread_map() for those events.
731 */
732 if (target__none(&rec->opts.target))
733 record__synthesize_workload(rec, false);
734 }
735 return fd;
736 }
737
738 static volatile int workload_exec_errno;
739
740 /*
741 * perf_evlist__prepare_workload will send a SIGUSR1
742 * if the fork fails, since we asked by setting its
743 * want_signal to true.
744 */
745 static void workload_exec_failed_signal(int signo __maybe_unused,
746 siginfo_t *info,
747 void *ucontext __maybe_unused)
748 {
749 workload_exec_errno = info->si_value.sival_int;
750 done = 1;
751 child_finished = 1;
752 }
753
754 static void snapshot_sig_handler(int sig);
755 static void alarm_sig_handler(int sig);
756
757 int __weak
758 perf_event__synth_time_conv(const struct perf_event_mmap_page *pc __maybe_unused,
759 struct perf_tool *tool __maybe_unused,
760 perf_event__handler_t process __maybe_unused,
761 struct machine *machine __maybe_unused)
762 {
763 return 0;
764 }
765
766 static const struct perf_event_mmap_page *
767 perf_evlist__pick_pc(struct perf_evlist *evlist)
768 {
769 if (evlist) {
770 if (evlist->mmap && evlist->mmap[0].base)
771 return evlist->mmap[0].base;
772 if (evlist->backward_mmap && evlist->backward_mmap[0].base)
773 return evlist->backward_mmap[0].base;
774 }
775 return NULL;
776 }
777
778 static const struct perf_event_mmap_page *record__pick_pc(struct record *rec)
779 {
780 const struct perf_event_mmap_page *pc;
781
782 pc = perf_evlist__pick_pc(rec->evlist);
783 if (pc)
784 return pc;
785 return NULL;
786 }
787
788 static int record__synthesize(struct record *rec, bool tail)
789 {
790 struct perf_session *session = rec->session;
791 struct machine *machine = &session->machines.host;
792 struct perf_data_file *file = &rec->file;
793 struct record_opts *opts = &rec->opts;
794 struct perf_tool *tool = &rec->tool;
795 int fd = perf_data_file__fd(file);
796 int err = 0;
797
798 if (rec->opts.tail_synthesize != tail)
799 return 0;
800
801 if (file->is_pipe) {
802 err = perf_event__synthesize_attrs(tool, session,
803 process_synthesized_event);
804 if (err < 0) {
805 pr_err("Couldn't synthesize attrs.\n");
806 goto out;
807 }
808
809 if (have_tracepoints(&rec->evlist->entries)) {
810 /*
811 * FIXME err <= 0 here actually means that
812 * there were no tracepoints so its not really
813 * an error, just that we don't need to
814 * synthesize anything. We really have to
815 * return this more properly and also
816 * propagate errors that now are calling die()
817 */
818 err = perf_event__synthesize_tracing_data(tool, fd, rec->evlist,
819 process_synthesized_event);
820 if (err <= 0) {
821 pr_err("Couldn't record tracing data.\n");
822 goto out;
823 }
824 rec->bytes_written += err;
825 }
826 }
827
828 err = perf_event__synth_time_conv(record__pick_pc(rec), tool,
829 process_synthesized_event, machine);
830 if (err)
831 goto out;
832
833 if (rec->opts.full_auxtrace) {
834 err = perf_event__synthesize_auxtrace_info(rec->itr, tool,
835 session, process_synthesized_event);
836 if (err)
837 goto out;
838 }
839
840 err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event,
841 machine);
842 WARN_ONCE(err < 0, "Couldn't record kernel reference relocation symbol\n"
843 "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n"
844 "Check /proc/kallsyms permission or run as root.\n");
845
846 err = perf_event__synthesize_modules(tool, process_synthesized_event,
847 machine);
848 WARN_ONCE(err < 0, "Couldn't record kernel module information.\n"
849 "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n"
850 "Check /proc/modules permission or run as root.\n");
851
852 if (perf_guest) {
853 machines__process_guests(&session->machines,
854 perf_event__synthesize_guest_os, tool);
855 }
856
857 err = __machine__synthesize_threads(machine, tool, &opts->target, rec->evlist->threads,
858 process_synthesized_event, opts->sample_address,
859 opts->proc_map_timeout);
860 out:
861 return err;
862 }
863
864 static int __cmd_record(struct record *rec, int argc, const char **argv)
865 {
866 int err;
867 int status = 0;
868 unsigned long waking = 0;
869 const bool forks = argc > 0;
870 struct machine *machine;
871 struct perf_tool *tool = &rec->tool;
872 struct record_opts *opts = &rec->opts;
873 struct perf_data_file *file = &rec->file;
874 struct perf_session *session;
875 bool disabled = false, draining = false;
876 int fd;
877
878 rec->progname = argv[0];
879
880 atexit(record__sig_exit);
881 signal(SIGCHLD, sig_handler);
882 signal(SIGINT, sig_handler);
883 signal(SIGTERM, sig_handler);
884 signal(SIGSEGV, sigsegv_handler);
885
886 if (rec->opts.record_namespaces)
887 tool->namespace_events = true;
888
889 if (rec->opts.auxtrace_snapshot_mode || rec->switch_output.enabled) {
890 signal(SIGUSR2, snapshot_sig_handler);
891 if (rec->opts.auxtrace_snapshot_mode)
892 trigger_on(&auxtrace_snapshot_trigger);
893 if (rec->switch_output.enabled)
894 trigger_on(&switch_output_trigger);
895 } else {
896 signal(SIGUSR2, SIG_IGN);
897 }
898
899 session = perf_session__new(file, false, tool);
900 if (session == NULL) {
901 pr_err("Perf session creation failed.\n");
902 return -1;
903 }
904
905 fd = perf_data_file__fd(file);
906 rec->session = session;
907
908 record__init_features(rec);
909
910 if (forks) {
911 err = perf_evlist__prepare_workload(rec->evlist, &opts->target,
912 argv, file->is_pipe,
913 workload_exec_failed_signal);
914 if (err < 0) {
915 pr_err("Couldn't run the workload!\n");
916 status = err;
917 goto out_delete_session;
918 }
919 }
920
921 if (record__open(rec) != 0) {
922 err = -1;
923 goto out_child;
924 }
925
926 err = bpf__apply_obj_config();
927 if (err) {
928 char errbuf[BUFSIZ];
929
930 bpf__strerror_apply_obj_config(err, errbuf, sizeof(errbuf));
931 pr_err("ERROR: Apply config to BPF failed: %s\n",
932 errbuf);
933 goto out_child;
934 }
935
936 /*
937 * Normally perf_session__new would do this, but it doesn't have the
938 * evlist.
939 */
940 if (rec->tool.ordered_events && !perf_evlist__sample_id_all(rec->evlist)) {
941 pr_warning("WARNING: No sample_id_all support, falling back to unordered processing\n");
942 rec->tool.ordered_events = false;
943 }
944
945 if (!rec->evlist->nr_groups)
946 perf_header__clear_feat(&session->header, HEADER_GROUP_DESC);
947
948 if (file->is_pipe) {
949 err = perf_header__write_pipe(fd);
950 if (err < 0)
951 goto out_child;
952 } else {
953 err = perf_session__write_header(session, rec->evlist, fd, false);
954 if (err < 0)
955 goto out_child;
956 }
957
958 if (!rec->no_buildid
959 && !perf_header__has_feat(&session->header, HEADER_BUILD_ID)) {
960 pr_err("Couldn't generate buildids. "
961 "Use --no-buildid to profile anyway.\n");
962 err = -1;
963 goto out_child;
964 }
965
966 machine = &session->machines.host;
967
968 err = record__synthesize(rec, false);
969 if (err < 0)
970 goto out_child;
971
972 if (rec->realtime_prio) {
973 struct sched_param param;
974
975 param.sched_priority = rec->realtime_prio;
976 if (sched_setscheduler(0, SCHED_FIFO, &param)) {
977 pr_err("Could not set realtime priority.\n");
978 err = -1;
979 goto out_child;
980 }
981 }
982
983 /*
984 * When perf is starting the traced process, all the events
985 * (apart from group members) have enable_on_exec=1 set,
986 * so don't spoil it by prematurely enabling them.
987 */
988 if (!target__none(&opts->target) && !opts->initial_delay)
989 perf_evlist__enable(rec->evlist);
990
991 /*
992 * Let the child rip
993 */
994 if (forks) {
995 union perf_event *event;
996 pid_t tgid;
997
998 event = malloc(sizeof(event->comm) + machine->id_hdr_size);
999 if (event == NULL) {
1000 err = -ENOMEM;
1001 goto out_child;
1002 }
1003
1004 /*
1005 * Some H/W events are generated before COMM event
1006 * which is emitted during exec(), so perf script
1007 * cannot see a correct process name for those events.
1008 * Synthesize COMM event to prevent it.
1009 */
1010 tgid = perf_event__synthesize_comm(tool, event,
1011 rec->evlist->workload.pid,
1012 process_synthesized_event,
1013 machine);
1014 free(event);
1015
1016 if (tgid == -1)
1017 goto out_child;
1018
1019 event = malloc(sizeof(event->namespaces) +
1020 (NR_NAMESPACES * sizeof(struct perf_ns_link_info)) +
1021 machine->id_hdr_size);
1022 if (event == NULL) {
1023 err = -ENOMEM;
1024 goto out_child;
1025 }
1026
1027 /*
1028 * Synthesize NAMESPACES event for the command specified.
1029 */
1030 perf_event__synthesize_namespaces(tool, event,
1031 rec->evlist->workload.pid,
1032 tgid, process_synthesized_event,
1033 machine);
1034 free(event);
1035
1036 perf_evlist__start_workload(rec->evlist);
1037 }
1038
1039 if (opts->initial_delay) {
1040 usleep(opts->initial_delay * USEC_PER_MSEC);
1041 perf_evlist__enable(rec->evlist);
1042 }
1043
1044 trigger_ready(&auxtrace_snapshot_trigger);
1045 trigger_ready(&switch_output_trigger);
1046 perf_hooks__invoke_record_start();
1047 for (;;) {
1048 unsigned long long hits = rec->samples;
1049
1050 /*
1051 * rec->evlist->bkw_mmap_state is possible to be
1052 * BKW_MMAP_EMPTY here: when done == true and
1053 * hits != rec->samples in previous round.
1054 *
1055 * perf_evlist__toggle_bkw_mmap ensure we never
1056 * convert BKW_MMAP_EMPTY to BKW_MMAP_DATA_PENDING.
1057 */
1058 if (trigger_is_hit(&switch_output_trigger) || done || draining)
1059 perf_evlist__toggle_bkw_mmap(rec->evlist, BKW_MMAP_DATA_PENDING);
1060
1061 if (record__mmap_read_all(rec) < 0) {
1062 trigger_error(&auxtrace_snapshot_trigger);
1063 trigger_error(&switch_output_trigger);
1064 err = -1;
1065 goto out_child;
1066 }
1067
1068 if (auxtrace_record__snapshot_started) {
1069 auxtrace_record__snapshot_started = 0;
1070 if (!trigger_is_error(&auxtrace_snapshot_trigger))
1071 record__read_auxtrace_snapshot(rec);
1072 if (trigger_is_error(&auxtrace_snapshot_trigger)) {
1073 pr_err("AUX area tracing snapshot failed\n");
1074 err = -1;
1075 goto out_child;
1076 }
1077 }
1078
1079 if (trigger_is_hit(&switch_output_trigger)) {
1080 /*
1081 * If switch_output_trigger is hit, the data in
1082 * overwritable ring buffer should have been collected,
1083 * so bkw_mmap_state should be set to BKW_MMAP_EMPTY.
1084 *
1085 * If SIGUSR2 raise after or during record__mmap_read_all(),
1086 * record__mmap_read_all() didn't collect data from
1087 * overwritable ring buffer. Read again.
1088 */
1089 if (rec->evlist->bkw_mmap_state == BKW_MMAP_RUNNING)
1090 continue;
1091 trigger_ready(&switch_output_trigger);
1092
1093 /*
1094 * Reenable events in overwrite ring buffer after
1095 * record__mmap_read_all(): we should have collected
1096 * data from it.
1097 */
1098 perf_evlist__toggle_bkw_mmap(rec->evlist, BKW_MMAP_RUNNING);
1099
1100 if (!quiet)
1101 fprintf(stderr, "[ perf record: dump data: Woken up %ld times ]\n",
1102 waking);
1103 waking = 0;
1104 fd = record__switch_output(rec, false);
1105 if (fd < 0) {
1106 pr_err("Failed to switch to new file\n");
1107 trigger_error(&switch_output_trigger);
1108 err = fd;
1109 goto out_child;
1110 }
1111
1112 /* re-arm the alarm */
1113 if (rec->switch_output.time)
1114 alarm(rec->switch_output.time);
1115 }
1116
1117 if (hits == rec->samples) {
1118 if (done || draining)
1119 break;
1120 err = perf_evlist__poll(rec->evlist, -1);
1121 /*
1122 * Propagate error, only if there's any. Ignore positive
1123 * number of returned events and interrupt error.
1124 */
1125 if (err > 0 || (err < 0 && errno == EINTR))
1126 err = 0;
1127 waking++;
1128
1129 if (perf_evlist__filter_pollfd(rec->evlist, POLLERR | POLLHUP) == 0)
1130 draining = true;
1131 }
1132
1133 /*
1134 * When perf is starting the traced process, at the end events
1135 * die with the process and we wait for that. Thus no need to
1136 * disable events in this case.
1137 */
1138 if (done && !disabled && !target__none(&opts->target)) {
1139 trigger_off(&auxtrace_snapshot_trigger);
1140 perf_evlist__disable(rec->evlist);
1141 disabled = true;
1142 }
1143 }
1144 trigger_off(&auxtrace_snapshot_trigger);
1145 trigger_off(&switch_output_trigger);
1146
1147 if (forks && workload_exec_errno) {
1148 char msg[STRERR_BUFSIZE];
1149 const char *emsg = str_error_r(workload_exec_errno, msg, sizeof(msg));
1150 pr_err("Workload failed: %s\n", emsg);
1151 err = -1;
1152 goto out_child;
1153 }
1154
1155 if (!quiet)
1156 fprintf(stderr, "[ perf record: Woken up %ld times to write data ]\n", waking);
1157
1158 if (target__none(&rec->opts.target))
1159 record__synthesize_workload(rec, true);
1160
1161 out_child:
1162 if (forks) {
1163 int exit_status;
1164
1165 if (!child_finished)
1166 kill(rec->evlist->workload.pid, SIGTERM);
1167
1168 wait(&exit_status);
1169
1170 if (err < 0)
1171 status = err;
1172 else if (WIFEXITED(exit_status))
1173 status = WEXITSTATUS(exit_status);
1174 else if (WIFSIGNALED(exit_status))
1175 signr = WTERMSIG(exit_status);
1176 } else
1177 status = err;
1178
1179 record__synthesize(rec, true);
1180 /* this will be recalculated during process_buildids() */
1181 rec->samples = 0;
1182
1183 if (!err) {
1184 if (!rec->timestamp_filename) {
1185 record__finish_output(rec);
1186 } else {
1187 fd = record__switch_output(rec, true);
1188 if (fd < 0) {
1189 status = fd;
1190 goto out_delete_session;
1191 }
1192 }
1193 }
1194
1195 perf_hooks__invoke_record_end();
1196
1197 if (!err && !quiet) {
1198 char samples[128];
1199 const char *postfix = rec->timestamp_filename ?
1200 ".<timestamp>" : "";
1201
1202 if (rec->samples && !rec->opts.full_auxtrace)
1203 scnprintf(samples, sizeof(samples),
1204 " (%" PRIu64 " samples)", rec->samples);
1205 else
1206 samples[0] = '\0';
1207
1208 fprintf(stderr, "[ perf record: Captured and wrote %.3f MB %s%s%s ]\n",
1209 perf_data_file__size(file) / 1024.0 / 1024.0,
1210 file->path, postfix, samples);
1211 }
1212
1213 out_delete_session:
1214 perf_session__delete(session);
1215 return status;
1216 }
1217
1218 static void callchain_debug(struct callchain_param *callchain)
1219 {
1220 static const char *str[CALLCHAIN_MAX] = { "NONE", "FP", "DWARF", "LBR" };
1221
1222 pr_debug("callchain: type %s\n", str[callchain->record_mode]);
1223
1224 if (callchain->record_mode == CALLCHAIN_DWARF)
1225 pr_debug("callchain: stack dump size %d\n",
1226 callchain->dump_size);
1227 }
1228
1229 int record_opts__parse_callchain(struct record_opts *record,
1230 struct callchain_param *callchain,
1231 const char *arg, bool unset)
1232 {
1233 int ret;
1234 callchain->enabled = !unset;
1235
1236 /* --no-call-graph */
1237 if (unset) {
1238 callchain->record_mode = CALLCHAIN_NONE;
1239 pr_debug("callchain: disabled\n");
1240 return 0;
1241 }
1242
1243 ret = parse_callchain_record_opt(arg, callchain);
1244 if (!ret) {
1245 /* Enable data address sampling for DWARF unwind. */
1246 if (callchain->record_mode == CALLCHAIN_DWARF)
1247 record->sample_address = true;
1248 callchain_debug(callchain);
1249 }
1250
1251 return ret;
1252 }
1253
1254 int record_parse_callchain_opt(const struct option *opt,
1255 const char *arg,
1256 int unset)
1257 {
1258 return record_opts__parse_callchain(opt->value, &callchain_param, arg, unset);
1259 }
1260
1261 int record_callchain_opt(const struct option *opt,
1262 const char *arg __maybe_unused,
1263 int unset __maybe_unused)
1264 {
1265 struct callchain_param *callchain = opt->value;
1266
1267 callchain->enabled = true;
1268
1269 if (callchain->record_mode == CALLCHAIN_NONE)
1270 callchain->record_mode = CALLCHAIN_FP;
1271
1272 callchain_debug(callchain);
1273 return 0;
1274 }
1275
1276 static int perf_record_config(const char *var, const char *value, void *cb)
1277 {
1278 struct record *rec = cb;
1279
1280 if (!strcmp(var, "record.build-id")) {
1281 if (!strcmp(value, "cache"))
1282 rec->no_buildid_cache = false;
1283 else if (!strcmp(value, "no-cache"))
1284 rec->no_buildid_cache = true;
1285 else if (!strcmp(value, "skip"))
1286 rec->no_buildid = true;
1287 else
1288 return -1;
1289 return 0;
1290 }
1291 if (!strcmp(var, "record.call-graph"))
1292 var = "call-graph.record-mode"; /* fall-through */
1293
1294 return perf_default_config(var, value, cb);
1295 }
1296
1297 struct clockid_map {
1298 const char *name;
1299 int clockid;
1300 };
1301
1302 #define CLOCKID_MAP(n, c) \
1303 { .name = n, .clockid = (c), }
1304
1305 #define CLOCKID_END { .name = NULL, }
1306
1307
1308 /*
1309 * Add the missing ones, we need to build on many distros...
1310 */
1311 #ifndef CLOCK_MONOTONIC_RAW
1312 #define CLOCK_MONOTONIC_RAW 4
1313 #endif
1314 #ifndef CLOCK_BOOTTIME
1315 #define CLOCK_BOOTTIME 7
1316 #endif
1317 #ifndef CLOCK_TAI
1318 #define CLOCK_TAI 11
1319 #endif
1320
1321 static const struct clockid_map clockids[] = {
1322 /* available for all events, NMI safe */
1323 CLOCKID_MAP("monotonic", CLOCK_MONOTONIC),
1324 CLOCKID_MAP("monotonic_raw", CLOCK_MONOTONIC_RAW),
1325
1326 /* available for some events */
1327 CLOCKID_MAP("realtime", CLOCK_REALTIME),
1328 CLOCKID_MAP("boottime", CLOCK_BOOTTIME),
1329 CLOCKID_MAP("tai", CLOCK_TAI),
1330
1331 /* available for the lazy */
1332 CLOCKID_MAP("mono", CLOCK_MONOTONIC),
1333 CLOCKID_MAP("raw", CLOCK_MONOTONIC_RAW),
1334 CLOCKID_MAP("real", CLOCK_REALTIME),
1335 CLOCKID_MAP("boot", CLOCK_BOOTTIME),
1336
1337 CLOCKID_END,
1338 };
1339
1340 static int parse_clockid(const struct option *opt, const char *str, int unset)
1341 {
1342 struct record_opts *opts = (struct record_opts *)opt->value;
1343 const struct clockid_map *cm;
1344 const char *ostr = str;
1345
1346 if (unset) {
1347 opts->use_clockid = 0;
1348 return 0;
1349 }
1350
1351 /* no arg passed */
1352 if (!str)
1353 return 0;
1354
1355 /* no setting it twice */
1356 if (opts->use_clockid)
1357 return -1;
1358
1359 opts->use_clockid = true;
1360
1361 /* if its a number, we're done */
1362 if (sscanf(str, "%d", &opts->clockid) == 1)
1363 return 0;
1364
1365 /* allow a "CLOCK_" prefix to the name */
1366 if (!strncasecmp(str, "CLOCK_", 6))
1367 str += 6;
1368
1369 for (cm = clockids; cm->name; cm++) {
1370 if (!strcasecmp(str, cm->name)) {
1371 opts->clockid = cm->clockid;
1372 return 0;
1373 }
1374 }
1375
1376 opts->use_clockid = false;
1377 ui__warning("unknown clockid %s, check man page\n", ostr);
1378 return -1;
1379 }
1380
1381 static int record__parse_mmap_pages(const struct option *opt,
1382 const char *str,
1383 int unset __maybe_unused)
1384 {
1385 struct record_opts *opts = opt->value;
1386 char *s, *p;
1387 unsigned int mmap_pages;
1388 int ret;
1389
1390 if (!str)
1391 return -EINVAL;
1392
1393 s = strdup(str);
1394 if (!s)
1395 return -ENOMEM;
1396
1397 p = strchr(s, ',');
1398 if (p)
1399 *p = '\0';
1400
1401 if (*s) {
1402 ret = __perf_evlist__parse_mmap_pages(&mmap_pages, s);
1403 if (ret)
1404 goto out_free;
1405 opts->mmap_pages = mmap_pages;
1406 }
1407
1408 if (!p) {
1409 ret = 0;
1410 goto out_free;
1411 }
1412
1413 ret = __perf_evlist__parse_mmap_pages(&mmap_pages, p + 1);
1414 if (ret)
1415 goto out_free;
1416
1417 opts->auxtrace_mmap_pages = mmap_pages;
1418
1419 out_free:
1420 free(s);
1421 return ret;
1422 }
1423
1424 static void switch_output_size_warn(struct record *rec)
1425 {
1426 u64 wakeup_size = perf_evlist__mmap_size(rec->opts.mmap_pages);
1427 struct switch_output *s = &rec->switch_output;
1428
1429 wakeup_size /= 2;
1430
1431 if (s->size < wakeup_size) {
1432 char buf[100];
1433
1434 unit_number__scnprintf(buf, sizeof(buf), wakeup_size);
1435 pr_warning("WARNING: switch-output data size lower than "
1436 "wakeup kernel buffer size (%s) "
1437 "expect bigger perf.data sizes\n", buf);
1438 }
1439 }
1440
1441 static int switch_output_setup(struct record *rec)
1442 {
1443 struct switch_output *s = &rec->switch_output;
1444 static struct parse_tag tags_size[] = {
1445 { .tag = 'B', .mult = 1 },
1446 { .tag = 'K', .mult = 1 << 10 },
1447 { .tag = 'M', .mult = 1 << 20 },
1448 { .tag = 'G', .mult = 1 << 30 },
1449 { .tag = 0 },
1450 };
1451 static struct parse_tag tags_time[] = {
1452 { .tag = 's', .mult = 1 },
1453 { .tag = 'm', .mult = 60 },
1454 { .tag = 'h', .mult = 60*60 },
1455 { .tag = 'd', .mult = 60*60*24 },
1456 { .tag = 0 },
1457 };
1458 unsigned long val;
1459
1460 if (!s->set)
1461 return 0;
1462
1463 if (!strcmp(s->str, "signal")) {
1464 s->signal = true;
1465 pr_debug("switch-output with SIGUSR2 signal\n");
1466 goto enabled;
1467 }
1468
1469 val = parse_tag_value(s->str, tags_size);
1470 if (val != (unsigned long) -1) {
1471 s->size = val;
1472 pr_debug("switch-output with %s size threshold\n", s->str);
1473 goto enabled;
1474 }
1475
1476 val = parse_tag_value(s->str, tags_time);
1477 if (val != (unsigned long) -1) {
1478 s->time = val;
1479 pr_debug("switch-output with %s time threshold (%lu seconds)\n",
1480 s->str, s->time);
1481 goto enabled;
1482 }
1483
1484 return -1;
1485
1486 enabled:
1487 rec->timestamp_filename = true;
1488 s->enabled = true;
1489
1490 if (s->size && !rec->opts.no_buffering)
1491 switch_output_size_warn(rec);
1492
1493 return 0;
1494 }
1495
1496 static const char * const __record_usage[] = {
1497 "perf record [<options>] [<command>]",
1498 "perf record [<options>] -- <command> [<options>]",
1499 NULL
1500 };
1501 const char * const *record_usage = __record_usage;
1502
1503 /*
1504 * XXX Ideally would be local to cmd_record() and passed to a record__new
1505 * because we need to have access to it in record__exit, that is called
1506 * after cmd_record() exits, but since record_options need to be accessible to
1507 * builtin-script, leave it here.
1508 *
1509 * At least we don't ouch it in all the other functions here directly.
1510 *
1511 * Just say no to tons of global variables, sigh.
1512 */
1513 static struct record record = {
1514 .opts = {
1515 .sample_time = true,
1516 .mmap_pages = UINT_MAX,
1517 .user_freq = UINT_MAX,
1518 .user_interval = ULLONG_MAX,
1519 .freq = 4000,
1520 .target = {
1521 .uses_mmap = true,
1522 .default_per_cpu = true,
1523 },
1524 .proc_map_timeout = 500,
1525 },
1526 .tool = {
1527 .sample = process_sample_event,
1528 .fork = perf_event__process_fork,
1529 .exit = perf_event__process_exit,
1530 .comm = perf_event__process_comm,
1531 .namespaces = perf_event__process_namespaces,
1532 .mmap = perf_event__process_mmap,
1533 .mmap2 = perf_event__process_mmap2,
1534 .ordered_events = true,
1535 },
1536 };
1537
1538 const char record_callchain_help[] = CALLCHAIN_RECORD_HELP
1539 "\n\t\t\t\tDefault: fp";
1540
1541 static bool dry_run;
1542
1543 /*
1544 * XXX Will stay a global variable till we fix builtin-script.c to stop messing
1545 * with it and switch to use the library functions in perf_evlist that came
1546 * from builtin-record.c, i.e. use record_opts,
1547 * perf_evlist__prepare_workload, etc instead of fork+exec'in 'perf record',
1548 * using pipes, etc.
1549 */
1550 static struct option __record_options[] = {
1551 OPT_CALLBACK('e', "event", &record.evlist, "event",
1552 "event selector. use 'perf list' to list available events",
1553 parse_events_option),
1554 OPT_CALLBACK(0, "filter", &record.evlist, "filter",
1555 "event filter", parse_filter),
1556 OPT_CALLBACK_NOOPT(0, "exclude-perf", &record.evlist,
1557 NULL, "don't record events from perf itself",
1558 exclude_perf),
1559 OPT_STRING('p', "pid", &record.opts.target.pid, "pid",
1560 "record events on existing process id"),
1561 OPT_STRING('t', "tid", &record.opts.target.tid, "tid",
1562 "record events on existing thread id"),
1563 OPT_INTEGER('r', "realtime", &record.realtime_prio,
1564 "collect data with this RT SCHED_FIFO priority"),
1565 OPT_BOOLEAN(0, "no-buffering", &record.opts.no_buffering,
1566 "collect data without buffering"),
1567 OPT_BOOLEAN('R', "raw-samples", &record.opts.raw_samples,
1568 "collect raw sample records from all opened counters"),
1569 OPT_BOOLEAN('a', "all-cpus", &record.opts.target.system_wide,
1570 "system-wide collection from all CPUs"),
1571 OPT_STRING('C', "cpu", &record.opts.target.cpu_list, "cpu",
1572 "list of cpus to monitor"),
1573 OPT_U64('c', "count", &record.opts.user_interval, "event period to sample"),
1574 OPT_STRING('o', "output", &record.file.path, "file",
1575 "output file name"),
1576 OPT_BOOLEAN_SET('i', "no-inherit", &record.opts.no_inherit,
1577 &record.opts.no_inherit_set,
1578 "child tasks do not inherit counters"),
1579 OPT_BOOLEAN(0, "tail-synthesize", &record.opts.tail_synthesize,
1580 "synthesize non-sample events at the end of output"),
1581 OPT_BOOLEAN(0, "overwrite", &record.opts.overwrite, "use overwrite mode"),
1582 OPT_UINTEGER('F', "freq", &record.opts.user_freq, "profile at this frequency"),
1583 OPT_CALLBACK('m', "mmap-pages", &record.opts, "pages[,pages]",
1584 "number of mmap data pages and AUX area tracing mmap pages",
1585 record__parse_mmap_pages),
1586 OPT_BOOLEAN(0, "group", &record.opts.group,
1587 "put the counters into a counter group"),
1588 OPT_CALLBACK_NOOPT('g', NULL, &callchain_param,
1589 NULL, "enables call-graph recording" ,
1590 &record_callchain_opt),
1591 OPT_CALLBACK(0, "call-graph", &record.opts,
1592 "record_mode[,record_size]", record_callchain_help,
1593 &record_parse_callchain_opt),
1594 OPT_INCR('v', "verbose", &verbose,
1595 "be more verbose (show counter open errors, etc)"),
1596 OPT_BOOLEAN('q', "quiet", &quiet, "don't print any message"),
1597 OPT_BOOLEAN('s', "stat", &record.opts.inherit_stat,
1598 "per thread counts"),
1599 OPT_BOOLEAN('d', "data", &record.opts.sample_address, "Record the sample addresses"),
1600 OPT_BOOLEAN(0, "sample-cpu", &record.opts.sample_cpu, "Record the sample cpu"),
1601 OPT_BOOLEAN_SET('T', "timestamp", &record.opts.sample_time,
1602 &record.opts.sample_time_set,
1603 "Record the sample timestamps"),
1604 OPT_BOOLEAN('P', "period", &record.opts.period, "Record the sample period"),
1605 OPT_BOOLEAN('n', "no-samples", &record.opts.no_samples,
1606 "don't sample"),
1607 OPT_BOOLEAN_SET('N', "no-buildid-cache", &record.no_buildid_cache,
1608 &record.no_buildid_cache_set,
1609 "do not update the buildid cache"),
1610 OPT_BOOLEAN_SET('B', "no-buildid", &record.no_buildid,
1611 &record.no_buildid_set,
1612 "do not collect buildids in perf.data"),
1613 OPT_CALLBACK('G', "cgroup", &record.evlist, "name",
1614 "monitor event in cgroup name only",
1615 parse_cgroups),
1616 OPT_UINTEGER('D', "delay", &record.opts.initial_delay,
1617 "ms to wait before starting measurement after program start"),
1618 OPT_STRING('u', "uid", &record.opts.target.uid_str, "user",
1619 "user to profile"),
1620
1621 OPT_CALLBACK_NOOPT('b', "branch-any", &record.opts.branch_stack,
1622 "branch any", "sample any taken branches",
1623 parse_branch_stack),
1624
1625 OPT_CALLBACK('j', "branch-filter", &record.opts.branch_stack,
1626 "branch filter mask", "branch stack filter modes",
1627 parse_branch_stack),
1628 OPT_BOOLEAN('W', "weight", &record.opts.sample_weight,
1629 "sample by weight (on special events only)"),
1630 OPT_BOOLEAN(0, "transaction", &record.opts.sample_transaction,
1631 "sample transaction flags (special events only)"),
1632 OPT_BOOLEAN(0, "per-thread", &record.opts.target.per_thread,
1633 "use per-thread mmaps"),
1634 OPT_CALLBACK_OPTARG('I', "intr-regs", &record.opts.sample_intr_regs, NULL, "any register",
1635 "sample selected machine registers on interrupt,"
1636 " use -I ? to list register names", parse_regs),
1637 OPT_BOOLEAN(0, "running-time", &record.opts.running_time,
1638 "Record running/enabled time of read (:S) events"),
1639 OPT_CALLBACK('k', "clockid", &record.opts,
1640 "clockid", "clockid to use for events, see clock_gettime()",
1641 parse_clockid),
1642 OPT_STRING_OPTARG('S', "snapshot", &record.opts.auxtrace_snapshot_opts,
1643 "opts", "AUX area tracing Snapshot Mode", ""),
1644 OPT_UINTEGER(0, "proc-map-timeout", &record.opts.proc_map_timeout,
1645 "per thread proc mmap processing timeout in ms"),
1646 OPT_BOOLEAN(0, "namespaces", &record.opts.record_namespaces,
1647 "Record namespaces events"),
1648 OPT_BOOLEAN(0, "switch-events", &record.opts.record_switch_events,
1649 "Record context switch events"),
1650 OPT_BOOLEAN_FLAG(0, "all-kernel", &record.opts.all_kernel,
1651 "Configure all used events to run in kernel space.",
1652 PARSE_OPT_EXCLUSIVE),
1653 OPT_BOOLEAN_FLAG(0, "all-user", &record.opts.all_user,
1654 "Configure all used events to run in user space.",
1655 PARSE_OPT_EXCLUSIVE),
1656 OPT_STRING(0, "clang-path", &llvm_param.clang_path, "clang path",
1657 "clang binary to use for compiling BPF scriptlets"),
1658 OPT_STRING(0, "clang-opt", &llvm_param.clang_opt, "clang options",
1659 "options passed to clang when compiling BPF scriptlets"),
1660 OPT_STRING(0, "vmlinux", &symbol_conf.vmlinux_name,
1661 "file", "vmlinux pathname"),
1662 OPT_BOOLEAN(0, "buildid-all", &record.buildid_all,
1663 "Record build-id of all DSOs regardless of hits"),
1664 OPT_BOOLEAN(0, "timestamp-filename", &record.timestamp_filename,
1665 "append timestamp to output filename"),
1666 OPT_STRING_OPTARG_SET(0, "switch-output", &record.switch_output.str,
1667 &record.switch_output.set, "signal,size,time",
1668 "Switch output when receive SIGUSR2 or cross size,time threshold",
1669 "signal"),
1670 OPT_BOOLEAN(0, "dry-run", &dry_run,
1671 "Parse options then exit"),
1672 OPT_END()
1673 };
1674
1675 struct option *record_options = __record_options;
1676
1677 int cmd_record(int argc, const char **argv)
1678 {
1679 int err;
1680 struct record *rec = &record;
1681 char errbuf[BUFSIZ];
1682
1683 #ifndef HAVE_LIBBPF_SUPPORT
1684 # define set_nobuild(s, l, c) set_option_nobuild(record_options, s, l, "NO_LIBBPF=1", c)
1685 set_nobuild('\0', "clang-path", true);
1686 set_nobuild('\0', "clang-opt", true);
1687 # undef set_nobuild
1688 #endif
1689
1690 #ifndef HAVE_BPF_PROLOGUE
1691 # if !defined (HAVE_DWARF_SUPPORT)
1692 # define REASON "NO_DWARF=1"
1693 # elif !defined (HAVE_LIBBPF_SUPPORT)
1694 # define REASON "NO_LIBBPF=1"
1695 # else
1696 # define REASON "this architecture doesn't support BPF prologue"
1697 # endif
1698 # define set_nobuild(s, l, c) set_option_nobuild(record_options, s, l, REASON, c)
1699 set_nobuild('\0', "vmlinux", true);
1700 # undef set_nobuild
1701 # undef REASON
1702 #endif
1703
1704 rec->evlist = perf_evlist__new();
1705 if (rec->evlist == NULL)
1706 return -ENOMEM;
1707
1708 err = perf_config(perf_record_config, rec);
1709 if (err)
1710 return err;
1711
1712 argc = parse_options(argc, argv, record_options, record_usage,
1713 PARSE_OPT_STOP_AT_NON_OPTION);
1714 if (quiet)
1715 perf_quiet_option();
1716
1717 /* Make system wide (-a) the default target. */
1718 if (!argc && target__none(&rec->opts.target))
1719 rec->opts.target.system_wide = true;
1720
1721 if (nr_cgroups && !rec->opts.target.system_wide) {
1722 usage_with_options_msg(record_usage, record_options,
1723 "cgroup monitoring only available in system-wide mode");
1724
1725 }
1726 if (rec->opts.record_switch_events &&
1727 !perf_can_record_switch_events()) {
1728 ui__error("kernel does not support recording context switch events\n");
1729 parse_options_usage(record_usage, record_options, "switch-events", 0);
1730 return -EINVAL;
1731 }
1732
1733 if (switch_output_setup(rec)) {
1734 parse_options_usage(record_usage, record_options, "switch-output", 0);
1735 return -EINVAL;
1736 }
1737
1738 if (rec->switch_output.time) {
1739 signal(SIGALRM, alarm_sig_handler);
1740 alarm(rec->switch_output.time);
1741 }
1742
1743 if (!rec->itr) {
1744 rec->itr = auxtrace_record__init(rec->evlist, &err);
1745 if (err)
1746 goto out;
1747 }
1748
1749 err = auxtrace_parse_snapshot_options(rec->itr, &rec->opts,
1750 rec->opts.auxtrace_snapshot_opts);
1751 if (err)
1752 goto out;
1753
1754 /*
1755 * Allow aliases to facilitate the lookup of symbols for address
1756 * filters. Refer to auxtrace_parse_filters().
1757 */
1758 symbol_conf.allow_aliases = true;
1759
1760 symbol__init(NULL);
1761
1762 err = auxtrace_parse_filters(rec->evlist);
1763 if (err)
1764 goto out;
1765
1766 if (dry_run)
1767 goto out;
1768
1769 err = bpf__setup_stdout(rec->evlist);
1770 if (err) {
1771 bpf__strerror_setup_stdout(rec->evlist, err, errbuf, sizeof(errbuf));
1772 pr_err("ERROR: Setup BPF stdout failed: %s\n",
1773 errbuf);
1774 goto out;
1775 }
1776
1777 err = -ENOMEM;
1778
1779 if (symbol_conf.kptr_restrict)
1780 pr_warning(
1781 "WARNING: Kernel address maps (/proc/{kallsyms,modules}) are restricted,\n"
1782 "check /proc/sys/kernel/kptr_restrict.\n\n"
1783 "Samples in kernel functions may not be resolved if a suitable vmlinux\n"
1784 "file is not found in the buildid cache or in the vmlinux path.\n\n"
1785 "Samples in kernel modules won't be resolved at all.\n\n"
1786 "If some relocation was applied (e.g. kexec) symbols may be misresolved\n"
1787 "even with a suitable vmlinux or kallsyms file.\n\n");
1788
1789 if (rec->no_buildid_cache || rec->no_buildid) {
1790 disable_buildid_cache();
1791 } else if (rec->switch_output.enabled) {
1792 /*
1793 * In 'perf record --switch-output', disable buildid
1794 * generation by default to reduce data file switching
1795 * overhead. Still generate buildid if they are required
1796 * explicitly using
1797 *
1798 * perf record --switch-output --no-no-buildid \
1799 * --no-no-buildid-cache
1800 *
1801 * Following code equals to:
1802 *
1803 * if ((rec->no_buildid || !rec->no_buildid_set) &&
1804 * (rec->no_buildid_cache || !rec->no_buildid_cache_set))
1805 * disable_buildid_cache();
1806 */
1807 bool disable = true;
1808
1809 if (rec->no_buildid_set && !rec->no_buildid)
1810 disable = false;
1811 if (rec->no_buildid_cache_set && !rec->no_buildid_cache)
1812 disable = false;
1813 if (disable) {
1814 rec->no_buildid = true;
1815 rec->no_buildid_cache = true;
1816 disable_buildid_cache();
1817 }
1818 }
1819
1820 if (record.opts.overwrite)
1821 record.opts.tail_synthesize = true;
1822
1823 if (rec->evlist->nr_entries == 0 &&
1824 perf_evlist__add_default(rec->evlist) < 0) {
1825 pr_err("Not enough memory for event selector list\n");
1826 goto out;
1827 }
1828
1829 if (rec->opts.target.tid && !rec->opts.no_inherit_set)
1830 rec->opts.no_inherit = true;
1831
1832 err = target__validate(&rec->opts.target);
1833 if (err) {
1834 target__strerror(&rec->opts.target, err, errbuf, BUFSIZ);
1835 ui__warning("%s", errbuf);
1836 }
1837
1838 err = target__parse_uid(&rec->opts.target);
1839 if (err) {
1840 int saved_errno = errno;
1841
1842 target__strerror(&rec->opts.target, err, errbuf, BUFSIZ);
1843 ui__error("%s", errbuf);
1844
1845 err = -saved_errno;
1846 goto out;
1847 }
1848
1849 /* Enable ignoring missing threads when -u option is defined. */
1850 rec->opts.ignore_missing_thread = rec->opts.target.uid != UINT_MAX;
1851
1852 err = -ENOMEM;
1853 if (perf_evlist__create_maps(rec->evlist, &rec->opts.target) < 0)
1854 usage_with_options(record_usage, record_options);
1855
1856 err = auxtrace_record__options(rec->itr, rec->evlist, &rec->opts);
1857 if (err)
1858 goto out;
1859
1860 /*
1861 * We take all buildids when the file contains
1862 * AUX area tracing data because we do not decode the
1863 * trace because it would take too long.
1864 */
1865 if (rec->opts.full_auxtrace)
1866 rec->buildid_all = true;
1867
1868 if (record_opts__config(&rec->opts)) {
1869 err = -EINVAL;
1870 goto out;
1871 }
1872
1873 err = __cmd_record(&record, argc, argv);
1874 out:
1875 perf_evlist__delete(rec->evlist);
1876 symbol__exit();
1877 auxtrace_record__free(rec->itr);
1878 return err;
1879 }
1880
1881 static void snapshot_sig_handler(int sig __maybe_unused)
1882 {
1883 struct record *rec = &record;
1884
1885 if (trigger_is_ready(&auxtrace_snapshot_trigger)) {
1886 trigger_hit(&auxtrace_snapshot_trigger);
1887 auxtrace_record__snapshot_started = 1;
1888 if (auxtrace_record__snapshot_start(record.itr))
1889 trigger_error(&auxtrace_snapshot_trigger);
1890 }
1891
1892 if (switch_output_signal(rec))
1893 trigger_hit(&switch_output_trigger);
1894 }
1895
1896 static void alarm_sig_handler(int sig __maybe_unused)
1897 {
1898 struct record *rec = &record;
1899
1900 if (switch_output_time(rec))
1901 trigger_hit(&switch_output_trigger);
1902 }