1 // SPDX-License-Identifier: GPL-2.0
5 * Builtin record command: Record the profile of a workload
6 * (or a CPU, or a PID) into the perf.data output file - for
7 * later analysis via perf report.
11 #include "util/build-id.h"
12 #include <subcmd/parse-options.h>
13 #include "util/parse-events.h"
14 #include "util/config.h"
16 #include "util/callchain.h"
17 #include "util/cgroup.h"
18 #include "util/header.h"
19 #include "util/event.h"
20 #include "util/evlist.h"
21 #include "util/evsel.h"
22 #include "util/debug.h"
23 #include "util/mmap.h"
24 #include "util/target.h"
25 #include "util/session.h"
26 #include "util/tool.h"
27 #include "util/symbol.h"
28 #include "util/record.h"
29 #include "util/cpumap.h"
30 #include "util/thread_map.h"
31 #include "util/data.h"
32 #include "util/perf_regs.h"
33 #include "util/auxtrace.h"
35 #include "util/parse-branch-options.h"
36 #include "util/parse-regs-options.h"
37 #include "util/llvm-utils.h"
38 #include "util/bpf-loader.h"
39 #include "util/trigger.h"
40 #include "util/perf-hooks.h"
41 #include "util/cpu-set-sched.h"
42 #include "util/synthetic-events.h"
43 #include "util/time-utils.h"
44 #include "util/units.h"
45 #include "util/bpf-event.h"
58 #include <sys/types.h>
61 #include <linux/err.h>
62 #include <linux/string.h>
63 #include <linux/time64.h>
64 #include <linux/zalloc.h>
66 struct switch_output
{
79 struct perf_tool tool
;
80 struct record_opts opts
;
82 struct perf_data data
;
83 struct auxtrace_record
*itr
;
84 struct evlist
*evlist
;
85 struct perf_session
*session
;
89 bool no_buildid_cache
;
90 bool no_buildid_cache_set
;
92 bool timestamp_filename
;
93 bool timestamp_boundary
;
94 struct switch_output switch_output
;
95 unsigned long long samples
;
96 cpu_set_t affinity_mask
;
97 unsigned long output_max_size
; /* = 0: unlimited */
100 static volatile int done
;
102 static volatile int auxtrace_record__snapshot_started
;
103 static DEFINE_TRIGGER(auxtrace_snapshot_trigger
);
104 static DEFINE_TRIGGER(switch_output_trigger
);
106 static const char *affinity_tags
[PERF_AFFINITY_MAX
] = {
110 static bool switch_output_signal(struct record
*rec
)
112 return rec
->switch_output
.signal
&&
113 trigger_is_ready(&switch_output_trigger
);
116 static bool switch_output_size(struct record
*rec
)
118 return rec
->switch_output
.size
&&
119 trigger_is_ready(&switch_output_trigger
) &&
120 (rec
->bytes_written
>= rec
->switch_output
.size
);
123 static bool switch_output_time(struct record
*rec
)
125 return rec
->switch_output
.time
&&
126 trigger_is_ready(&switch_output_trigger
);
129 static bool record__output_max_size_exceeded(struct record
*rec
)
131 return rec
->output_max_size
&&
132 (rec
->bytes_written
>= rec
->output_max_size
);
135 static int record__write(struct record
*rec
, struct mmap
*map __maybe_unused
,
136 void *bf
, size_t size
)
138 struct perf_data_file
*file
= &rec
->session
->data
->file
;
140 if (perf_data_file__write(file
, bf
, size
) < 0) {
141 pr_err("failed to write perf data, error: %m\n");
145 rec
->bytes_written
+= size
;
147 if (record__output_max_size_exceeded(rec
) && !done
) {
148 fprintf(stderr
, "[ perf record: perf size limit reached (%" PRIu64
" KB),"
149 " stopping session ]\n",
150 rec
->bytes_written
>> 10);
154 if (switch_output_size(rec
))
155 trigger_hit(&switch_output_trigger
);
160 static int record__aio_enabled(struct record
*rec
);
161 static int record__comp_enabled(struct record
*rec
);
162 static size_t zstd_compress(struct perf_session
*session
, void *dst
, size_t dst_size
,
163 void *src
, size_t src_size
);
165 #ifdef HAVE_AIO_SUPPORT
166 static int record__aio_write(struct aiocb
*cblock
, int trace_fd
,
167 void *buf
, size_t size
, off_t off
)
171 cblock
->aio_fildes
= trace_fd
;
172 cblock
->aio_buf
= buf
;
173 cblock
->aio_nbytes
= size
;
174 cblock
->aio_offset
= off
;
175 cblock
->aio_sigevent
.sigev_notify
= SIGEV_NONE
;
178 rc
= aio_write(cblock
);
181 } else if (errno
!= EAGAIN
) {
182 cblock
->aio_fildes
= -1;
183 pr_err("failed to queue perf data, error: %m\n");
191 static int record__aio_complete(struct mmap
*md
, struct aiocb
*cblock
)
197 ssize_t aio_ret
, written
;
199 aio_errno
= aio_error(cblock
);
200 if (aio_errno
== EINPROGRESS
)
203 written
= aio_ret
= aio_return(cblock
);
205 if (aio_errno
!= EINTR
)
206 pr_err("failed to write perf data, error: %m\n");
210 rem_size
= cblock
->aio_nbytes
- written
;
213 cblock
->aio_fildes
= -1;
215 * md->refcount is incremented in record__aio_pushfn() for
216 * every aio write request started in record__aio_push() so
217 * decrement it because the request is now complete.
219 perf_mmap__put(&md
->core
);
223 * aio write request may require restart with the
224 * reminder if the kernel didn't write whole
227 rem_off
= cblock
->aio_offset
+ written
;
228 rem_buf
= (void *)(cblock
->aio_buf
+ written
);
229 record__aio_write(cblock
, cblock
->aio_fildes
,
230 rem_buf
, rem_size
, rem_off
);
237 static int record__aio_sync(struct mmap
*md
, bool sync_all
)
239 struct aiocb
**aiocb
= md
->aio
.aiocb
;
240 struct aiocb
*cblocks
= md
->aio
.cblocks
;
241 struct timespec timeout
= { 0, 1000 * 1000 * 1 }; /* 1ms */
246 for (i
= 0; i
< md
->aio
.nr_cblocks
; ++i
) {
247 if (cblocks
[i
].aio_fildes
== -1 || record__aio_complete(md
, &cblocks
[i
])) {
254 * Started aio write is not complete yet
255 * so it has to be waited before the
258 aiocb
[i
] = &cblocks
[i
];
265 while (aio_suspend((const struct aiocb
**)aiocb
, md
->aio
.nr_cblocks
, &timeout
)) {
266 if (!(errno
== EAGAIN
|| errno
== EINTR
))
267 pr_err("failed to sync perf data, error: %m\n");
278 static int record__aio_pushfn(struct mmap
*map
, void *to
, void *buf
, size_t size
)
280 struct record_aio
*aio
= to
;
283 * map->core.base data pointed by buf is copied into free map->aio.data[] buffer
284 * to release space in the kernel buffer as fast as possible, calling
285 * perf_mmap__consume() from perf_mmap__push() function.
287 * That lets the kernel to proceed with storing more profiling data into
288 * the kernel buffer earlier than other per-cpu kernel buffers are handled.
290 * Coping can be done in two steps in case the chunk of profiling data
291 * crosses the upper bound of the kernel buffer. In this case we first move
292 * part of data from map->start till the upper bound and then the reminder
293 * from the beginning of the kernel buffer till the end of the data chunk.
296 if (record__comp_enabled(aio
->rec
)) {
297 size
= zstd_compress(aio
->rec
->session
, aio
->data
+ aio
->size
,
298 mmap__mmap_len(map
) - aio
->size
,
301 memcpy(aio
->data
+ aio
->size
, buf
, size
);
306 * Increment map->refcount to guard map->aio.data[] buffer
307 * from premature deallocation because map object can be
308 * released earlier than aio write request started on
309 * map->aio.data[] buffer is complete.
311 * perf_mmap__put() is done at record__aio_complete()
312 * after started aio request completion or at record__aio_push()
313 * if the request failed to start.
315 perf_mmap__get(&map
->core
);
323 static int record__aio_push(struct record
*rec
, struct mmap
*map
, off_t
*off
)
326 int trace_fd
= rec
->session
->data
->file
.fd
;
327 struct record_aio aio
= { .rec
= rec
, .size
= 0 };
330 * Call record__aio_sync() to wait till map->aio.data[] buffer
331 * becomes available after previous aio write operation.
334 idx
= record__aio_sync(map
, false);
335 aio
.data
= map
->aio
.data
[idx
];
336 ret
= perf_mmap__push(map
, &aio
, record__aio_pushfn
);
337 if (ret
!= 0) /* ret > 0 - no data, ret < 0 - error */
341 ret
= record__aio_write(&(map
->aio
.cblocks
[idx
]), trace_fd
, aio
.data
, aio
.size
, *off
);
344 rec
->bytes_written
+= aio
.size
;
345 if (switch_output_size(rec
))
346 trigger_hit(&switch_output_trigger
);
349 * Decrement map->refcount incremented in record__aio_pushfn()
350 * back if record__aio_write() operation failed to start, otherwise
351 * map->refcount is decremented in record__aio_complete() after
352 * aio write operation finishes successfully.
354 perf_mmap__put(&map
->core
);
360 static off_t
record__aio_get_pos(int trace_fd
)
362 return lseek(trace_fd
, 0, SEEK_CUR
);
365 static void record__aio_set_pos(int trace_fd
, off_t pos
)
367 lseek(trace_fd
, pos
, SEEK_SET
);
370 static void record__aio_mmap_read_sync(struct record
*rec
)
373 struct evlist
*evlist
= rec
->evlist
;
374 struct mmap
*maps
= evlist
->mmap
;
376 if (!record__aio_enabled(rec
))
379 for (i
= 0; i
< evlist
->core
.nr_mmaps
; i
++) {
380 struct mmap
*map
= &maps
[i
];
383 record__aio_sync(map
, true);
387 static int nr_cblocks_default
= 1;
388 static int nr_cblocks_max
= 4;
390 static int record__aio_parse(const struct option
*opt
,
394 struct record_opts
*opts
= (struct record_opts
*)opt
->value
;
397 opts
->nr_cblocks
= 0;
400 opts
->nr_cblocks
= strtol(str
, NULL
, 0);
401 if (!opts
->nr_cblocks
)
402 opts
->nr_cblocks
= nr_cblocks_default
;
407 #else /* HAVE_AIO_SUPPORT */
408 static int nr_cblocks_max
= 0;
410 static int record__aio_push(struct record
*rec __maybe_unused
, struct mmap
*map __maybe_unused
,
411 off_t
*off __maybe_unused
)
416 static off_t
record__aio_get_pos(int trace_fd __maybe_unused
)
421 static void record__aio_set_pos(int trace_fd __maybe_unused
, off_t pos __maybe_unused
)
425 static void record__aio_mmap_read_sync(struct record
*rec __maybe_unused
)
430 static int record__aio_enabled(struct record
*rec
)
432 return rec
->opts
.nr_cblocks
> 0;
435 #define MMAP_FLUSH_DEFAULT 1
436 static int record__mmap_flush_parse(const struct option
*opt
,
441 struct record_opts
*opts
= (struct record_opts
*)opt
->value
;
442 static struct parse_tag tags
[] = {
443 { .tag
= 'B', .mult
= 1 },
444 { .tag
= 'K', .mult
= 1 << 10 },
445 { .tag
= 'M', .mult
= 1 << 20 },
446 { .tag
= 'G', .mult
= 1 << 30 },
454 opts
->mmap_flush
= parse_tag_value(str
, tags
);
455 if (opts
->mmap_flush
== (int)-1)
456 opts
->mmap_flush
= strtol(str
, NULL
, 0);
459 if (!opts
->mmap_flush
)
460 opts
->mmap_flush
= MMAP_FLUSH_DEFAULT
;
462 flush_max
= evlist__mmap_size(opts
->mmap_pages
);
464 if (opts
->mmap_flush
> flush_max
)
465 opts
->mmap_flush
= flush_max
;
470 #ifdef HAVE_ZSTD_SUPPORT
471 static unsigned int comp_level_default
= 1;
473 static int record__parse_comp_level(const struct option
*opt
, const char *str
, int unset
)
475 struct record_opts
*opts
= opt
->value
;
478 opts
->comp_level
= 0;
481 opts
->comp_level
= strtol(str
, NULL
, 0);
482 if (!opts
->comp_level
)
483 opts
->comp_level
= comp_level_default
;
489 static unsigned int comp_level_max
= 22;
491 static int record__comp_enabled(struct record
*rec
)
493 return rec
->opts
.comp_level
> 0;
496 static int process_synthesized_event(struct perf_tool
*tool
,
497 union perf_event
*event
,
498 struct perf_sample
*sample __maybe_unused
,
499 struct machine
*machine __maybe_unused
)
501 struct record
*rec
= container_of(tool
, struct record
, tool
);
502 return record__write(rec
, NULL
, event
, event
->header
.size
);
505 static int record__pushfn(struct mmap
*map
, void *to
, void *bf
, size_t size
)
507 struct record
*rec
= to
;
509 if (record__comp_enabled(rec
)) {
510 size
= zstd_compress(rec
->session
, map
->data
, mmap__mmap_len(map
), bf
, size
);
515 return record__write(rec
, map
, bf
, size
);
518 static volatile int signr
= -1;
519 static volatile int child_finished
;
521 static void sig_handler(int sig
)
531 static void sigsegv_handler(int sig
)
533 perf_hooks__recover();
534 sighandler_dump_stack(sig
);
537 static void record__sig_exit(void)
542 signal(signr
, SIG_DFL
);
546 #ifdef HAVE_AUXTRACE_SUPPORT
548 static int record__process_auxtrace(struct perf_tool
*tool
,
550 union perf_event
*event
, void *data1
,
551 size_t len1
, void *data2
, size_t len2
)
553 struct record
*rec
= container_of(tool
, struct record
, tool
);
554 struct perf_data
*data
= &rec
->data
;
558 if (!perf_data__is_pipe(data
) && perf_data__is_single_file(data
)) {
560 int fd
= perf_data__fd(data
);
563 file_offset
= lseek(fd
, 0, SEEK_CUR
);
564 if (file_offset
== -1)
566 err
= auxtrace_index__auxtrace_event(&rec
->session
->auxtrace_index
,
572 /* event.auxtrace.size includes padding, see __auxtrace_mmap__read() */
573 padding
= (len1
+ len2
) & 7;
575 padding
= 8 - padding
;
577 record__write(rec
, map
, event
, event
->header
.size
);
578 record__write(rec
, map
, data1
, len1
);
580 record__write(rec
, map
, data2
, len2
);
581 record__write(rec
, map
, &pad
, padding
);
586 static int record__auxtrace_mmap_read(struct record
*rec
,
591 ret
= auxtrace_mmap__read(map
, rec
->itr
, &rec
->tool
,
592 record__process_auxtrace
);
602 static int record__auxtrace_mmap_read_snapshot(struct record
*rec
,
607 ret
= auxtrace_mmap__read_snapshot(map
, rec
->itr
, &rec
->tool
,
608 record__process_auxtrace
,
609 rec
->opts
.auxtrace_snapshot_size
);
619 static int record__auxtrace_read_snapshot_all(struct record
*rec
)
624 for (i
= 0; i
< rec
->evlist
->core
.nr_mmaps
; i
++) {
625 struct mmap
*map
= &rec
->evlist
->mmap
[i
];
627 if (!map
->auxtrace_mmap
.base
)
630 if (record__auxtrace_mmap_read_snapshot(rec
, map
) != 0) {
639 static void record__read_auxtrace_snapshot(struct record
*rec
, bool on_exit
)
641 pr_debug("Recording AUX area tracing snapshot\n");
642 if (record__auxtrace_read_snapshot_all(rec
) < 0) {
643 trigger_error(&auxtrace_snapshot_trigger
);
645 if (auxtrace_record__snapshot_finish(rec
->itr
, on_exit
))
646 trigger_error(&auxtrace_snapshot_trigger
);
648 trigger_ready(&auxtrace_snapshot_trigger
);
652 static int record__auxtrace_snapshot_exit(struct record
*rec
)
654 if (trigger_is_error(&auxtrace_snapshot_trigger
))
657 if (!auxtrace_record__snapshot_started
&&
658 auxtrace_record__snapshot_start(rec
->itr
))
661 record__read_auxtrace_snapshot(rec
, true);
662 if (trigger_is_error(&auxtrace_snapshot_trigger
))
668 static int record__auxtrace_init(struct record
*rec
)
673 rec
->itr
= auxtrace_record__init(rec
->evlist
, &err
);
678 err
= auxtrace_parse_snapshot_options(rec
->itr
, &rec
->opts
,
679 rec
->opts
.auxtrace_snapshot_opts
);
683 err
= auxtrace_parse_sample_options(rec
->itr
, rec
->evlist
, &rec
->opts
,
684 rec
->opts
.auxtrace_sample_opts
);
688 return auxtrace_parse_filters(rec
->evlist
);
694 int record__auxtrace_mmap_read(struct record
*rec __maybe_unused
,
695 struct mmap
*map __maybe_unused
)
701 void record__read_auxtrace_snapshot(struct record
*rec __maybe_unused
,
702 bool on_exit __maybe_unused
)
707 int auxtrace_record__snapshot_start(struct auxtrace_record
*itr __maybe_unused
)
713 int record__auxtrace_snapshot_exit(struct record
*rec __maybe_unused
)
718 static int record__auxtrace_init(struct record
*rec __maybe_unused
)
725 static bool record__kcore_readable(struct machine
*machine
)
727 char kcore
[PATH_MAX
];
730 scnprintf(kcore
, sizeof(kcore
), "%s/proc/kcore", machine
->root_dir
);
732 fd
= open(kcore
, O_RDONLY
);
741 static int record__kcore_copy(struct machine
*machine
, struct perf_data
*data
)
743 char from_dir
[PATH_MAX
];
744 char kcore_dir
[PATH_MAX
];
747 snprintf(from_dir
, sizeof(from_dir
), "%s/proc", machine
->root_dir
);
749 ret
= perf_data__make_kcore_dir(data
, kcore_dir
, sizeof(kcore_dir
));
753 return kcore_copy(from_dir
, kcore_dir
);
756 static int record__mmap_evlist(struct record
*rec
,
757 struct evlist
*evlist
)
759 struct record_opts
*opts
= &rec
->opts
;
760 bool auxtrace_overwrite
= opts
->auxtrace_snapshot_mode
||
761 opts
->auxtrace_sample_mode
;
764 if (opts
->affinity
!= PERF_AFFINITY_SYS
)
765 cpu__setup_cpunode_map();
767 if (evlist__mmap_ex(evlist
, opts
->mmap_pages
,
768 opts
->auxtrace_mmap_pages
,
770 opts
->nr_cblocks
, opts
->affinity
,
771 opts
->mmap_flush
, opts
->comp_level
) < 0) {
772 if (errno
== EPERM
) {
773 pr_err("Permission error mapping pages.\n"
774 "Consider increasing "
775 "/proc/sys/kernel/perf_event_mlock_kb,\n"
776 "or try again with a smaller value of -m/--mmap_pages.\n"
777 "(current value: %u,%u)\n",
778 opts
->mmap_pages
, opts
->auxtrace_mmap_pages
);
781 pr_err("failed to mmap with %d (%s)\n", errno
,
782 str_error_r(errno
, msg
, sizeof(msg
)));
792 static int record__mmap(struct record
*rec
)
794 return record__mmap_evlist(rec
, rec
->evlist
);
797 static int record__open(struct record
*rec
)
801 struct evlist
*evlist
= rec
->evlist
;
802 struct perf_session
*session
= rec
->session
;
803 struct record_opts
*opts
= &rec
->opts
;
807 * For initial_delay we need to add a dummy event so that we can track
808 * PERF_RECORD_MMAP while we wait for the initial delay to enable the
809 * real events, the ones asked by the user.
811 if (opts
->initial_delay
) {
812 if (perf_evlist__add_dummy(evlist
))
815 pos
= evlist__first(evlist
);
817 pos
= evlist__last(evlist
);
819 pos
->core
.attr
.enable_on_exec
= 1;
822 perf_evlist__config(evlist
, opts
, &callchain_param
);
824 evlist__for_each_entry(evlist
, pos
) {
826 if (evsel__open(pos
, pos
->core
.cpus
, pos
->core
.threads
) < 0) {
827 if (perf_evsel__fallback(pos
, errno
, msg
, sizeof(msg
))) {
829 ui__warning("%s\n", msg
);
832 if ((errno
== EINVAL
|| errno
== EBADF
) &&
833 pos
->leader
!= pos
&&
835 pos
= perf_evlist__reset_weak_group(evlist
, pos
);
839 perf_evsel__open_strerror(pos
, &opts
->target
,
840 errno
, msg
, sizeof(msg
));
841 ui__error("%s\n", msg
);
845 pos
->supported
= true;
848 if (symbol_conf
.kptr_restrict
&& !perf_evlist__exclude_kernel(evlist
)) {
850 "WARNING: Kernel address maps (/proc/{kallsyms,modules}) are restricted,\n"
851 "check /proc/sys/kernel/kptr_restrict and /proc/sys/kernel/perf_event_paranoid.\n\n"
852 "Samples in kernel functions may not be resolved if a suitable vmlinux\n"
853 "file is not found in the buildid cache or in the vmlinux path.\n\n"
854 "Samples in kernel modules won't be resolved at all.\n\n"
855 "If some relocation was applied (e.g. kexec) symbols may be misresolved\n"
856 "even with a suitable vmlinux or kallsyms file.\n\n");
859 if (perf_evlist__apply_filters(evlist
, &pos
)) {
860 pr_err("failed to set filter \"%s\" on event %s with %d (%s)\n",
861 pos
->filter
, perf_evsel__name(pos
), errno
,
862 str_error_r(errno
, msg
, sizeof(msg
)));
867 rc
= record__mmap(rec
);
871 session
->evlist
= evlist
;
872 perf_session__set_id_hdr_size(session
);
877 static int process_sample_event(struct perf_tool
*tool
,
878 union perf_event
*event
,
879 struct perf_sample
*sample
,
881 struct machine
*machine
)
883 struct record
*rec
= container_of(tool
, struct record
, tool
);
885 if (rec
->evlist
->first_sample_time
== 0)
886 rec
->evlist
->first_sample_time
= sample
->time
;
888 rec
->evlist
->last_sample_time
= sample
->time
;
890 if (rec
->buildid_all
)
894 return build_id__mark_dso_hit(tool
, event
, sample
, evsel
, machine
);
897 static int process_buildids(struct record
*rec
)
899 struct perf_session
*session
= rec
->session
;
901 if (perf_data__size(&rec
->data
) == 0)
905 * During this process, it'll load kernel map and replace the
906 * dso->long_name to a real pathname it found. In this case
907 * we prefer the vmlinux path like
908 * /lib/modules/3.16.4/build/vmlinux
910 * rather than build-id path (in debug directory).
911 * $HOME/.debug/.build-id/f0/6e17aa50adf4d00b88925e03775de107611551
913 symbol_conf
.ignore_vmlinux_buildid
= true;
916 * If --buildid-all is given, it marks all DSO regardless of hits,
917 * so no need to process samples. But if timestamp_boundary is enabled,
918 * it still needs to walk on all samples to get the timestamps of
919 * first/last samples.
921 if (rec
->buildid_all
&& !rec
->timestamp_boundary
)
922 rec
->tool
.sample
= NULL
;
924 return perf_session__process_events(session
);
927 static void perf_event__synthesize_guest_os(struct machine
*machine
, void *data
)
930 struct perf_tool
*tool
= data
;
932 *As for guest kernel when processing subcommand record&report,
933 *we arrange module mmap prior to guest kernel mmap and trigger
934 *a preload dso because default guest module symbols are loaded
935 *from guest kallsyms instead of /lib/modules/XXX/XXX. This
936 *method is used to avoid symbol missing when the first addr is
937 *in module instead of in guest kernel.
939 err
= perf_event__synthesize_modules(tool
, process_synthesized_event
,
942 pr_err("Couldn't record guest kernel [%d]'s reference"
943 " relocation symbol.\n", machine
->pid
);
946 * We use _stext for guest kernel because guest kernel's /proc/kallsyms
947 * have no _text sometimes.
949 err
= perf_event__synthesize_kernel_mmap(tool
, process_synthesized_event
,
952 pr_err("Couldn't record guest kernel [%d]'s reference"
953 " relocation symbol.\n", machine
->pid
);
956 static struct perf_event_header finished_round_event
= {
957 .size
= sizeof(struct perf_event_header
),
958 .type
= PERF_RECORD_FINISHED_ROUND
,
961 static void record__adjust_affinity(struct record
*rec
, struct mmap
*map
)
963 if (rec
->opts
.affinity
!= PERF_AFFINITY_SYS
&&
964 !CPU_EQUAL(&rec
->affinity_mask
, &map
->affinity_mask
)) {
965 CPU_ZERO(&rec
->affinity_mask
);
966 CPU_OR(&rec
->affinity_mask
, &rec
->affinity_mask
, &map
->affinity_mask
);
967 sched_setaffinity(0, sizeof(rec
->affinity_mask
), &rec
->affinity_mask
);
971 static size_t process_comp_header(void *record
, size_t increment
)
973 struct perf_record_compressed
*event
= record
;
974 size_t size
= sizeof(*event
);
977 event
->header
.size
+= increment
;
981 event
->header
.type
= PERF_RECORD_COMPRESSED
;
982 event
->header
.size
= size
;
987 static size_t zstd_compress(struct perf_session
*session
, void *dst
, size_t dst_size
,
988 void *src
, size_t src_size
)
991 size_t max_record_size
= PERF_SAMPLE_MAX_SIZE
- sizeof(struct perf_record_compressed
) - 1;
993 compressed
= zstd_compress_stream_to_records(&session
->zstd_data
, dst
, dst_size
, src
, src_size
,
994 max_record_size
, process_comp_header
);
996 session
->bytes_transferred
+= src_size
;
997 session
->bytes_compressed
+= compressed
;
1002 static int record__mmap_read_evlist(struct record
*rec
, struct evlist
*evlist
,
1003 bool overwrite
, bool synch
)
1005 u64 bytes_written
= rec
->bytes_written
;
1009 int trace_fd
= rec
->data
.file
.fd
;
1015 maps
= overwrite
? evlist
->overwrite_mmap
: evlist
->mmap
;
1019 if (overwrite
&& evlist
->bkw_mmap_state
!= BKW_MMAP_DATA_PENDING
)
1022 if (record__aio_enabled(rec
))
1023 off
= record__aio_get_pos(trace_fd
);
1025 for (i
= 0; i
< evlist
->core
.nr_mmaps
; i
++) {
1027 struct mmap
*map
= &maps
[i
];
1029 if (map
->core
.base
) {
1030 record__adjust_affinity(rec
, map
);
1032 flush
= map
->core
.flush
;
1033 map
->core
.flush
= 1;
1035 if (!record__aio_enabled(rec
)) {
1036 if (perf_mmap__push(map
, rec
, record__pushfn
) < 0) {
1038 map
->core
.flush
= flush
;
1043 if (record__aio_push(rec
, map
, &off
) < 0) {
1044 record__aio_set_pos(trace_fd
, off
);
1046 map
->core
.flush
= flush
;
1052 map
->core
.flush
= flush
;
1055 if (map
->auxtrace_mmap
.base
&& !rec
->opts
.auxtrace_snapshot_mode
&&
1056 !rec
->opts
.auxtrace_sample_mode
&&
1057 record__auxtrace_mmap_read(rec
, map
) != 0) {
1063 if (record__aio_enabled(rec
))
1064 record__aio_set_pos(trace_fd
, off
);
1067 * Mark the round finished in case we wrote
1068 * at least one event.
1070 if (bytes_written
!= rec
->bytes_written
)
1071 rc
= record__write(rec
, NULL
, &finished_round_event
, sizeof(finished_round_event
));
1074 perf_evlist__toggle_bkw_mmap(evlist
, BKW_MMAP_EMPTY
);
1079 static int record__mmap_read_all(struct record
*rec
, bool synch
)
1083 err
= record__mmap_read_evlist(rec
, rec
->evlist
, false, synch
);
1087 return record__mmap_read_evlist(rec
, rec
->evlist
, true, synch
);
1090 static void record__init_features(struct record
*rec
)
1092 struct perf_session
*session
= rec
->session
;
1095 for (feat
= HEADER_FIRST_FEATURE
; feat
< HEADER_LAST_FEATURE
; feat
++)
1096 perf_header__set_feat(&session
->header
, feat
);
1098 if (rec
->no_buildid
)
1099 perf_header__clear_feat(&session
->header
, HEADER_BUILD_ID
);
1101 if (!have_tracepoints(&rec
->evlist
->core
.entries
))
1102 perf_header__clear_feat(&session
->header
, HEADER_TRACING_DATA
);
1104 if (!rec
->opts
.branch_stack
)
1105 perf_header__clear_feat(&session
->header
, HEADER_BRANCH_STACK
);
1107 if (!rec
->opts
.full_auxtrace
)
1108 perf_header__clear_feat(&session
->header
, HEADER_AUXTRACE
);
1110 if (!(rec
->opts
.use_clockid
&& rec
->opts
.clockid_res_ns
))
1111 perf_header__clear_feat(&session
->header
, HEADER_CLOCKID
);
1113 perf_header__clear_feat(&session
->header
, HEADER_DIR_FORMAT
);
1114 if (!record__comp_enabled(rec
))
1115 perf_header__clear_feat(&session
->header
, HEADER_COMPRESSED
);
1117 perf_header__clear_feat(&session
->header
, HEADER_STAT
);
1121 record__finish_output(struct record
*rec
)
1123 struct perf_data
*data
= &rec
->data
;
1124 int fd
= perf_data__fd(data
);
1129 rec
->session
->header
.data_size
+= rec
->bytes_written
;
1130 data
->file
.size
= lseek(perf_data__fd(data
), 0, SEEK_CUR
);
1132 if (!rec
->no_buildid
) {
1133 process_buildids(rec
);
1135 if (rec
->buildid_all
)
1136 dsos__hit_all(rec
->session
);
1138 perf_session__write_header(rec
->session
, rec
->evlist
, fd
, true);
1143 static int record__synthesize_workload(struct record
*rec
, bool tail
)
1146 struct perf_thread_map
*thread_map
;
1148 if (rec
->opts
.tail_synthesize
!= tail
)
1151 thread_map
= thread_map__new_by_tid(rec
->evlist
->workload
.pid
);
1152 if (thread_map
== NULL
)
1155 err
= perf_event__synthesize_thread_map(&rec
->tool
, thread_map
,
1156 process_synthesized_event
,
1157 &rec
->session
->machines
.host
,
1158 rec
->opts
.sample_address
);
1159 perf_thread_map__put(thread_map
);
1163 static int record__synthesize(struct record
*rec
, bool tail
);
1166 record__switch_output(struct record
*rec
, bool at_exit
)
1168 struct perf_data
*data
= &rec
->data
;
1172 /* Same Size: "2015122520103046"*/
1173 char timestamp
[] = "InvalidTimestamp";
1175 record__aio_mmap_read_sync(rec
);
1177 record__synthesize(rec
, true);
1178 if (target__none(&rec
->opts
.target
))
1179 record__synthesize_workload(rec
, true);
1182 record__finish_output(rec
);
1183 err
= fetch_current_timestamp(timestamp
, sizeof(timestamp
));
1185 pr_err("Failed to get current timestamp\n");
1189 fd
= perf_data__switch(data
, timestamp
,
1190 rec
->session
->header
.data_offset
,
1191 at_exit
, &new_filename
);
1192 if (fd
>= 0 && !at_exit
) {
1193 rec
->bytes_written
= 0;
1194 rec
->session
->header
.data_size
= 0;
1198 fprintf(stderr
, "[ perf record: Dump %s.%s ]\n",
1199 data
->path
, timestamp
);
1201 if (rec
->switch_output
.num_files
) {
1202 int n
= rec
->switch_output
.cur_file
+ 1;
1204 if (n
>= rec
->switch_output
.num_files
)
1206 rec
->switch_output
.cur_file
= n
;
1207 if (rec
->switch_output
.filenames
[n
]) {
1208 remove(rec
->switch_output
.filenames
[n
]);
1209 zfree(&rec
->switch_output
.filenames
[n
]);
1211 rec
->switch_output
.filenames
[n
] = new_filename
;
1216 /* Output tracking events */
1218 record__synthesize(rec
, false);
1221 * In 'perf record --switch-output' without -a,
1222 * record__synthesize() in record__switch_output() won't
1223 * generate tracking events because there's no thread_map
1224 * in evlist. Which causes newly created perf.data doesn't
1225 * contain map and comm information.
1226 * Create a fake thread_map and directly call
1227 * perf_event__synthesize_thread_map() for those events.
1229 if (target__none(&rec
->opts
.target
))
1230 record__synthesize_workload(rec
, false);
1235 static volatile int workload_exec_errno
;
1238 * perf_evlist__prepare_workload will send a SIGUSR1
1239 * if the fork fails, since we asked by setting its
1240 * want_signal to true.
1242 static void workload_exec_failed_signal(int signo __maybe_unused
,
1244 void *ucontext __maybe_unused
)
1246 workload_exec_errno
= info
->si_value
.sival_int
;
1251 static void snapshot_sig_handler(int sig
);
1252 static void alarm_sig_handler(int sig
);
1254 static const struct perf_event_mmap_page
*
1255 perf_evlist__pick_pc(struct evlist
*evlist
)
1258 if (evlist
->mmap
&& evlist
->mmap
[0].core
.base
)
1259 return evlist
->mmap
[0].core
.base
;
1260 if (evlist
->overwrite_mmap
&& evlist
->overwrite_mmap
[0].core
.base
)
1261 return evlist
->overwrite_mmap
[0].core
.base
;
1266 static const struct perf_event_mmap_page
*record__pick_pc(struct record
*rec
)
1268 const struct perf_event_mmap_page
*pc
;
1270 pc
= perf_evlist__pick_pc(rec
->evlist
);
1276 static int record__synthesize(struct record
*rec
, bool tail
)
1278 struct perf_session
*session
= rec
->session
;
1279 struct machine
*machine
= &session
->machines
.host
;
1280 struct perf_data
*data
= &rec
->data
;
1281 struct record_opts
*opts
= &rec
->opts
;
1282 struct perf_tool
*tool
= &rec
->tool
;
1283 int fd
= perf_data__fd(data
);
1286 if (rec
->opts
.tail_synthesize
!= tail
)
1289 if (data
->is_pipe
) {
1291 * We need to synthesize events first, because some
1292 * features works on top of them (on report side).
1294 err
= perf_event__synthesize_attrs(tool
, rec
->evlist
,
1295 process_synthesized_event
);
1297 pr_err("Couldn't synthesize attrs.\n");
1301 err
= perf_event__synthesize_features(tool
, session
, rec
->evlist
,
1302 process_synthesized_event
);
1304 pr_err("Couldn't synthesize features.\n");
1308 if (have_tracepoints(&rec
->evlist
->core
.entries
)) {
1310 * FIXME err <= 0 here actually means that
1311 * there were no tracepoints so its not really
1312 * an error, just that we don't need to
1313 * synthesize anything. We really have to
1314 * return this more properly and also
1315 * propagate errors that now are calling die()
1317 err
= perf_event__synthesize_tracing_data(tool
, fd
, rec
->evlist
,
1318 process_synthesized_event
);
1320 pr_err("Couldn't record tracing data.\n");
1323 rec
->bytes_written
+= err
;
1327 err
= perf_event__synth_time_conv(record__pick_pc(rec
), tool
,
1328 process_synthesized_event
, machine
);
1332 /* Synthesize id_index before auxtrace_info */
1333 if (rec
->opts
.auxtrace_sample_mode
) {
1334 err
= perf_event__synthesize_id_index(tool
,
1335 process_synthesized_event
,
1336 session
->evlist
, machine
);
1341 if (rec
->opts
.full_auxtrace
) {
1342 err
= perf_event__synthesize_auxtrace_info(rec
->itr
, tool
,
1343 session
, process_synthesized_event
);
1348 if (!perf_evlist__exclude_kernel(rec
->evlist
)) {
1349 err
= perf_event__synthesize_kernel_mmap(tool
, process_synthesized_event
,
1351 WARN_ONCE(err
< 0, "Couldn't record kernel reference relocation symbol\n"
1352 "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n"
1353 "Check /proc/kallsyms permission or run as root.\n");
1355 err
= perf_event__synthesize_modules(tool
, process_synthesized_event
,
1357 WARN_ONCE(err
< 0, "Couldn't record kernel module information.\n"
1358 "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n"
1359 "Check /proc/modules permission or run as root.\n");
1363 machines__process_guests(&session
->machines
,
1364 perf_event__synthesize_guest_os
, tool
);
1367 err
= perf_event__synthesize_extra_attr(&rec
->tool
,
1369 process_synthesized_event
,
1374 err
= perf_event__synthesize_thread_map2(&rec
->tool
, rec
->evlist
->core
.threads
,
1375 process_synthesized_event
,
1378 pr_err("Couldn't synthesize thread map.\n");
1382 err
= perf_event__synthesize_cpu_map(&rec
->tool
, rec
->evlist
->core
.cpus
,
1383 process_synthesized_event
, NULL
);
1385 pr_err("Couldn't synthesize cpu map.\n");
1389 err
= perf_event__synthesize_bpf_events(session
, process_synthesized_event
,
1392 pr_warning("Couldn't synthesize bpf events.\n");
1394 err
= __machine__synthesize_threads(machine
, tool
, &opts
->target
, rec
->evlist
->core
.threads
,
1395 process_synthesized_event
, opts
->sample_address
,
1401 static int __cmd_record(struct record
*rec
, int argc
, const char **argv
)
1405 unsigned long waking
= 0;
1406 const bool forks
= argc
> 0;
1407 struct perf_tool
*tool
= &rec
->tool
;
1408 struct record_opts
*opts
= &rec
->opts
;
1409 struct perf_data
*data
= &rec
->data
;
1410 struct perf_session
*session
;
1411 bool disabled
= false, draining
= false;
1412 struct evlist
*sb_evlist
= NULL
;
1416 atexit(record__sig_exit
);
1417 signal(SIGCHLD
, sig_handler
);
1418 signal(SIGINT
, sig_handler
);
1419 signal(SIGTERM
, sig_handler
);
1420 signal(SIGSEGV
, sigsegv_handler
);
1422 if (rec
->opts
.record_namespaces
)
1423 tool
->namespace_events
= true;
1425 if (rec
->opts
.auxtrace_snapshot_mode
|| rec
->switch_output
.enabled
) {
1426 signal(SIGUSR2
, snapshot_sig_handler
);
1427 if (rec
->opts
.auxtrace_snapshot_mode
)
1428 trigger_on(&auxtrace_snapshot_trigger
);
1429 if (rec
->switch_output
.enabled
)
1430 trigger_on(&switch_output_trigger
);
1432 signal(SIGUSR2
, SIG_IGN
);
1435 session
= perf_session__new(data
, false, tool
);
1436 if (IS_ERR(session
)) {
1437 pr_err("Perf session creation failed.\n");
1438 return PTR_ERR(session
);
1441 fd
= perf_data__fd(data
);
1442 rec
->session
= session
;
1444 if (zstd_init(&session
->zstd_data
, rec
->opts
.comp_level
) < 0) {
1445 pr_err("Compression initialization failed.\n");
1449 session
->header
.env
.comp_type
= PERF_COMP_ZSTD
;
1450 session
->header
.env
.comp_level
= rec
->opts
.comp_level
;
1452 if (rec
->opts
.kcore
&&
1453 !record__kcore_readable(&session
->machines
.host
)) {
1454 pr_err("ERROR: kcore is not readable.\n");
1458 record__init_features(rec
);
1460 if (rec
->opts
.use_clockid
&& rec
->opts
.clockid_res_ns
)
1461 session
->header
.env
.clockid_res_ns
= rec
->opts
.clockid_res_ns
;
1464 err
= perf_evlist__prepare_workload(rec
->evlist
, &opts
->target
,
1465 argv
, data
->is_pipe
,
1466 workload_exec_failed_signal
);
1468 pr_err("Couldn't run the workload!\n");
1470 goto out_delete_session
;
1475 * If we have just single event and are sending data
1476 * through pipe, we need to force the ids allocation,
1477 * because we synthesize event name through the pipe
1478 * and need the id for that.
1480 if (data
->is_pipe
&& rec
->evlist
->core
.nr_entries
== 1)
1481 rec
->opts
.sample_id
= true;
1483 if (record__open(rec
) != 0) {
1487 session
->header
.env
.comp_mmap_len
= session
->evlist
->core
.mmap_len
;
1489 if (rec
->opts
.kcore
) {
1490 err
= record__kcore_copy(&session
->machines
.host
, data
);
1492 pr_err("ERROR: Failed to copy kcore\n");
1497 err
= bpf__apply_obj_config();
1499 char errbuf
[BUFSIZ
];
1501 bpf__strerror_apply_obj_config(err
, errbuf
, sizeof(errbuf
));
1502 pr_err("ERROR: Apply config to BPF failed: %s\n",
1508 * Normally perf_session__new would do this, but it doesn't have the
1511 if (rec
->tool
.ordered_events
&& !perf_evlist__sample_id_all(rec
->evlist
)) {
1512 pr_warning("WARNING: No sample_id_all support, falling back to unordered processing\n");
1513 rec
->tool
.ordered_events
= false;
1516 if (!rec
->evlist
->nr_groups
)
1517 perf_header__clear_feat(&session
->header
, HEADER_GROUP_DESC
);
1519 if (data
->is_pipe
) {
1520 err
= perf_header__write_pipe(fd
);
1524 err
= perf_session__write_header(session
, rec
->evlist
, fd
, false);
1529 if (!rec
->no_buildid
1530 && !perf_header__has_feat(&session
->header
, HEADER_BUILD_ID
)) {
1531 pr_err("Couldn't generate buildids. "
1532 "Use --no-buildid to profile anyway.\n");
1537 if (!opts
->no_bpf_event
)
1538 bpf_event__add_sb_event(&sb_evlist
, &session
->header
.env
);
1540 if (perf_evlist__start_sb_thread(sb_evlist
, &rec
->opts
.target
)) {
1541 pr_debug("Couldn't start the BPF side band thread:\nBPF programs starting from now on won't be annotatable\n");
1542 opts
->no_bpf_event
= true;
1545 err
= record__synthesize(rec
, false);
1549 if (rec
->realtime_prio
) {
1550 struct sched_param param
;
1552 param
.sched_priority
= rec
->realtime_prio
;
1553 if (sched_setscheduler(0, SCHED_FIFO
, ¶m
)) {
1554 pr_err("Could not set realtime priority.\n");
1561 * When perf is starting the traced process, all the events
1562 * (apart from group members) have enable_on_exec=1 set,
1563 * so don't spoil it by prematurely enabling them.
1565 if (!target__none(&opts
->target
) && !opts
->initial_delay
)
1566 evlist__enable(rec
->evlist
);
1572 struct machine
*machine
= &session
->machines
.host
;
1573 union perf_event
*event
;
1576 event
= malloc(sizeof(event
->comm
) + machine
->id_hdr_size
);
1577 if (event
== NULL
) {
1583 * Some H/W events are generated before COMM event
1584 * which is emitted during exec(), so perf script
1585 * cannot see a correct process name for those events.
1586 * Synthesize COMM event to prevent it.
1588 tgid
= perf_event__synthesize_comm(tool
, event
,
1589 rec
->evlist
->workload
.pid
,
1590 process_synthesized_event
,
1597 event
= malloc(sizeof(event
->namespaces
) +
1598 (NR_NAMESPACES
* sizeof(struct perf_ns_link_info
)) +
1599 machine
->id_hdr_size
);
1600 if (event
== NULL
) {
1606 * Synthesize NAMESPACES event for the command specified.
1608 perf_event__synthesize_namespaces(tool
, event
,
1609 rec
->evlist
->workload
.pid
,
1610 tgid
, process_synthesized_event
,
1614 perf_evlist__start_workload(rec
->evlist
);
1617 if (opts
->initial_delay
) {
1618 usleep(opts
->initial_delay
* USEC_PER_MSEC
);
1619 evlist__enable(rec
->evlist
);
1622 trigger_ready(&auxtrace_snapshot_trigger
);
1623 trigger_ready(&switch_output_trigger
);
1624 perf_hooks__invoke_record_start();
1626 unsigned long long hits
= rec
->samples
;
1629 * rec->evlist->bkw_mmap_state is possible to be
1630 * BKW_MMAP_EMPTY here: when done == true and
1631 * hits != rec->samples in previous round.
1633 * perf_evlist__toggle_bkw_mmap ensure we never
1634 * convert BKW_MMAP_EMPTY to BKW_MMAP_DATA_PENDING.
1636 if (trigger_is_hit(&switch_output_trigger
) || done
|| draining
)
1637 perf_evlist__toggle_bkw_mmap(rec
->evlist
, BKW_MMAP_DATA_PENDING
);
1639 if (record__mmap_read_all(rec
, false) < 0) {
1640 trigger_error(&auxtrace_snapshot_trigger
);
1641 trigger_error(&switch_output_trigger
);
1646 if (auxtrace_record__snapshot_started
) {
1647 auxtrace_record__snapshot_started
= 0;
1648 if (!trigger_is_error(&auxtrace_snapshot_trigger
))
1649 record__read_auxtrace_snapshot(rec
, false);
1650 if (trigger_is_error(&auxtrace_snapshot_trigger
)) {
1651 pr_err("AUX area tracing snapshot failed\n");
1657 if (trigger_is_hit(&switch_output_trigger
)) {
1659 * If switch_output_trigger is hit, the data in
1660 * overwritable ring buffer should have been collected,
1661 * so bkw_mmap_state should be set to BKW_MMAP_EMPTY.
1663 * If SIGUSR2 raise after or during record__mmap_read_all(),
1664 * record__mmap_read_all() didn't collect data from
1665 * overwritable ring buffer. Read again.
1667 if (rec
->evlist
->bkw_mmap_state
== BKW_MMAP_RUNNING
)
1669 trigger_ready(&switch_output_trigger
);
1672 * Reenable events in overwrite ring buffer after
1673 * record__mmap_read_all(): we should have collected
1676 perf_evlist__toggle_bkw_mmap(rec
->evlist
, BKW_MMAP_RUNNING
);
1679 fprintf(stderr
, "[ perf record: dump data: Woken up %ld times ]\n",
1682 fd
= record__switch_output(rec
, false);
1684 pr_err("Failed to switch to new file\n");
1685 trigger_error(&switch_output_trigger
);
1690 /* re-arm the alarm */
1691 if (rec
->switch_output
.time
)
1692 alarm(rec
->switch_output
.time
);
1695 if (hits
== rec
->samples
) {
1696 if (done
|| draining
)
1698 err
= evlist__poll(rec
->evlist
, -1);
1700 * Propagate error, only if there's any. Ignore positive
1701 * number of returned events and interrupt error.
1703 if (err
> 0 || (err
< 0 && errno
== EINTR
))
1707 if (evlist__filter_pollfd(rec
->evlist
, POLLERR
| POLLHUP
) == 0)
1712 * When perf is starting the traced process, at the end events
1713 * die with the process and we wait for that. Thus no need to
1714 * disable events in this case.
1716 if (done
&& !disabled
&& !target__none(&opts
->target
)) {
1717 trigger_off(&auxtrace_snapshot_trigger
);
1718 evlist__disable(rec
->evlist
);
1723 trigger_off(&auxtrace_snapshot_trigger
);
1724 trigger_off(&switch_output_trigger
);
1726 if (opts
->auxtrace_snapshot_on_exit
)
1727 record__auxtrace_snapshot_exit(rec
);
1729 if (forks
&& workload_exec_errno
) {
1730 char msg
[STRERR_BUFSIZE
];
1731 const char *emsg
= str_error_r(workload_exec_errno
, msg
, sizeof(msg
));
1732 pr_err("Workload failed: %s\n", emsg
);
1738 fprintf(stderr
, "[ perf record: Woken up %ld times to write data ]\n", waking
);
1740 if (target__none(&rec
->opts
.target
))
1741 record__synthesize_workload(rec
, true);
1744 record__mmap_read_all(rec
, true);
1745 record__aio_mmap_read_sync(rec
);
1747 if (rec
->session
->bytes_transferred
&& rec
->session
->bytes_compressed
) {
1748 ratio
= (float)rec
->session
->bytes_transferred
/(float)rec
->session
->bytes_compressed
;
1749 session
->header
.env
.comp_ratio
= ratio
+ 0.5;
1755 if (!child_finished
)
1756 kill(rec
->evlist
->workload
.pid
, SIGTERM
);
1762 else if (WIFEXITED(exit_status
))
1763 status
= WEXITSTATUS(exit_status
);
1764 else if (WIFSIGNALED(exit_status
))
1765 signr
= WTERMSIG(exit_status
);
1769 record__synthesize(rec
, true);
1770 /* this will be recalculated during process_buildids() */
1774 if (!rec
->timestamp_filename
) {
1775 record__finish_output(rec
);
1777 fd
= record__switch_output(rec
, true);
1780 goto out_delete_session
;
1785 perf_hooks__invoke_record_end();
1787 if (!err
&& !quiet
) {
1789 const char *postfix
= rec
->timestamp_filename
?
1790 ".<timestamp>" : "";
1792 if (rec
->samples
&& !rec
->opts
.full_auxtrace
)
1793 scnprintf(samples
, sizeof(samples
),
1794 " (%" PRIu64
" samples)", rec
->samples
);
1798 fprintf(stderr
, "[ perf record: Captured and wrote %.3f MB %s%s%s",
1799 perf_data__size(data
) / 1024.0 / 1024.0,
1800 data
->path
, postfix
, samples
);
1802 fprintf(stderr
, ", compressed (original %.3f MB, ratio is %.3f)",
1803 rec
->session
->bytes_transferred
/ 1024.0 / 1024.0,
1806 fprintf(stderr
, " ]\n");
1810 zstd_fini(&session
->zstd_data
);
1811 perf_session__delete(session
);
1813 if (!opts
->no_bpf_event
)
1814 perf_evlist__stop_sb_thread(sb_evlist
);
1818 static void callchain_debug(struct callchain_param
*callchain
)
1820 static const char *str
[CALLCHAIN_MAX
] = { "NONE", "FP", "DWARF", "LBR" };
1822 pr_debug("callchain: type %s\n", str
[callchain
->record_mode
]);
1824 if (callchain
->record_mode
== CALLCHAIN_DWARF
)
1825 pr_debug("callchain: stack dump size %d\n",
1826 callchain
->dump_size
);
1829 int record_opts__parse_callchain(struct record_opts
*record
,
1830 struct callchain_param
*callchain
,
1831 const char *arg
, bool unset
)
1834 callchain
->enabled
= !unset
;
1836 /* --no-call-graph */
1838 callchain
->record_mode
= CALLCHAIN_NONE
;
1839 pr_debug("callchain: disabled\n");
1843 ret
= parse_callchain_record_opt(arg
, callchain
);
1845 /* Enable data address sampling for DWARF unwind. */
1846 if (callchain
->record_mode
== CALLCHAIN_DWARF
)
1847 record
->sample_address
= true;
1848 callchain_debug(callchain
);
1854 int record_parse_callchain_opt(const struct option
*opt
,
1858 return record_opts__parse_callchain(opt
->value
, &callchain_param
, arg
, unset
);
1861 int record_callchain_opt(const struct option
*opt
,
1862 const char *arg __maybe_unused
,
1863 int unset __maybe_unused
)
1865 struct callchain_param
*callchain
= opt
->value
;
1867 callchain
->enabled
= true;
1869 if (callchain
->record_mode
== CALLCHAIN_NONE
)
1870 callchain
->record_mode
= CALLCHAIN_FP
;
1872 callchain_debug(callchain
);
1876 static int perf_record_config(const char *var
, const char *value
, void *cb
)
1878 struct record
*rec
= cb
;
1880 if (!strcmp(var
, "record.build-id")) {
1881 if (!strcmp(value
, "cache"))
1882 rec
->no_buildid_cache
= false;
1883 else if (!strcmp(value
, "no-cache"))
1884 rec
->no_buildid_cache
= true;
1885 else if (!strcmp(value
, "skip"))
1886 rec
->no_buildid
= true;
1891 if (!strcmp(var
, "record.call-graph")) {
1892 var
= "call-graph.record-mode";
1893 return perf_default_config(var
, value
, cb
);
1895 #ifdef HAVE_AIO_SUPPORT
1896 if (!strcmp(var
, "record.aio")) {
1897 rec
->opts
.nr_cblocks
= strtol(value
, NULL
, 0);
1898 if (!rec
->opts
.nr_cblocks
)
1899 rec
->opts
.nr_cblocks
= nr_cblocks_default
;
1906 struct clockid_map
{
1911 #define CLOCKID_MAP(n, c) \
1912 { .name = n, .clockid = (c), }
1914 #define CLOCKID_END { .name = NULL, }
1918 * Add the missing ones, we need to build on many distros...
1920 #ifndef CLOCK_MONOTONIC_RAW
1921 #define CLOCK_MONOTONIC_RAW 4
1923 #ifndef CLOCK_BOOTTIME
1924 #define CLOCK_BOOTTIME 7
1927 #define CLOCK_TAI 11
1930 static const struct clockid_map clockids
[] = {
1931 /* available for all events, NMI safe */
1932 CLOCKID_MAP("monotonic", CLOCK_MONOTONIC
),
1933 CLOCKID_MAP("monotonic_raw", CLOCK_MONOTONIC_RAW
),
1935 /* available for some events */
1936 CLOCKID_MAP("realtime", CLOCK_REALTIME
),
1937 CLOCKID_MAP("boottime", CLOCK_BOOTTIME
),
1938 CLOCKID_MAP("tai", CLOCK_TAI
),
1940 /* available for the lazy */
1941 CLOCKID_MAP("mono", CLOCK_MONOTONIC
),
1942 CLOCKID_MAP("raw", CLOCK_MONOTONIC_RAW
),
1943 CLOCKID_MAP("real", CLOCK_REALTIME
),
1944 CLOCKID_MAP("boot", CLOCK_BOOTTIME
),
1949 static int get_clockid_res(clockid_t clk_id
, u64
*res_ns
)
1951 struct timespec res
;
1954 if (!clock_getres(clk_id
, &res
))
1955 *res_ns
= res
.tv_nsec
+ res
.tv_sec
* NSEC_PER_SEC
;
1957 pr_warning("WARNING: Failed to determine specified clock resolution.\n");
1962 static int parse_clockid(const struct option
*opt
, const char *str
, int unset
)
1964 struct record_opts
*opts
= (struct record_opts
*)opt
->value
;
1965 const struct clockid_map
*cm
;
1966 const char *ostr
= str
;
1969 opts
->use_clockid
= 0;
1977 /* no setting it twice */
1978 if (opts
->use_clockid
)
1981 opts
->use_clockid
= true;
1983 /* if its a number, we're done */
1984 if (sscanf(str
, "%d", &opts
->clockid
) == 1)
1985 return get_clockid_res(opts
->clockid
, &opts
->clockid_res_ns
);
1987 /* allow a "CLOCK_" prefix to the name */
1988 if (!strncasecmp(str
, "CLOCK_", 6))
1991 for (cm
= clockids
; cm
->name
; cm
++) {
1992 if (!strcasecmp(str
, cm
->name
)) {
1993 opts
->clockid
= cm
->clockid
;
1994 return get_clockid_res(opts
->clockid
,
1995 &opts
->clockid_res_ns
);
1999 opts
->use_clockid
= false;
2000 ui__warning("unknown clockid %s, check man page\n", ostr
);
2004 static int record__parse_affinity(const struct option
*opt
, const char *str
, int unset
)
2006 struct record_opts
*opts
= (struct record_opts
*)opt
->value
;
2011 if (!strcasecmp(str
, "node"))
2012 opts
->affinity
= PERF_AFFINITY_NODE
;
2013 else if (!strcasecmp(str
, "cpu"))
2014 opts
->affinity
= PERF_AFFINITY_CPU
;
2019 static int parse_output_max_size(const struct option
*opt
,
2020 const char *str
, int unset
)
2022 unsigned long *s
= (unsigned long *)opt
->value
;
2023 static struct parse_tag tags_size
[] = {
2024 { .tag
= 'B', .mult
= 1 },
2025 { .tag
= 'K', .mult
= 1 << 10 },
2026 { .tag
= 'M', .mult
= 1 << 20 },
2027 { .tag
= 'G', .mult
= 1 << 30 },
2037 val
= parse_tag_value(str
, tags_size
);
2038 if (val
!= (unsigned long) -1) {
2046 static int record__parse_mmap_pages(const struct option
*opt
,
2048 int unset __maybe_unused
)
2050 struct record_opts
*opts
= opt
->value
;
2052 unsigned int mmap_pages
;
2067 ret
= __perf_evlist__parse_mmap_pages(&mmap_pages
, s
);
2070 opts
->mmap_pages
= mmap_pages
;
2078 ret
= __perf_evlist__parse_mmap_pages(&mmap_pages
, p
+ 1);
2082 opts
->auxtrace_mmap_pages
= mmap_pages
;
2089 static void switch_output_size_warn(struct record
*rec
)
2091 u64 wakeup_size
= evlist__mmap_size(rec
->opts
.mmap_pages
);
2092 struct switch_output
*s
= &rec
->switch_output
;
2096 if (s
->size
< wakeup_size
) {
2099 unit_number__scnprintf(buf
, sizeof(buf
), wakeup_size
);
2100 pr_warning("WARNING: switch-output data size lower than "
2101 "wakeup kernel buffer size (%s) "
2102 "expect bigger perf.data sizes\n", buf
);
2106 static int switch_output_setup(struct record
*rec
)
2108 struct switch_output
*s
= &rec
->switch_output
;
2109 static struct parse_tag tags_size
[] = {
2110 { .tag
= 'B', .mult
= 1 },
2111 { .tag
= 'K', .mult
= 1 << 10 },
2112 { .tag
= 'M', .mult
= 1 << 20 },
2113 { .tag
= 'G', .mult
= 1 << 30 },
2116 static struct parse_tag tags_time
[] = {
2117 { .tag
= 's', .mult
= 1 },
2118 { .tag
= 'm', .mult
= 60 },
2119 { .tag
= 'h', .mult
= 60*60 },
2120 { .tag
= 'd', .mult
= 60*60*24 },
2128 if (!strcmp(s
->str
, "signal")) {
2130 pr_debug("switch-output with SIGUSR2 signal\n");
2134 val
= parse_tag_value(s
->str
, tags_size
);
2135 if (val
!= (unsigned long) -1) {
2137 pr_debug("switch-output with %s size threshold\n", s
->str
);
2141 val
= parse_tag_value(s
->str
, tags_time
);
2142 if (val
!= (unsigned long) -1) {
2144 pr_debug("switch-output with %s time threshold (%lu seconds)\n",
2152 rec
->timestamp_filename
= true;
2155 if (s
->size
&& !rec
->opts
.no_buffering
)
2156 switch_output_size_warn(rec
);
2161 static const char * const __record_usage
[] = {
2162 "perf record [<options>] [<command>]",
2163 "perf record [<options>] -- <command> [<options>]",
2166 const char * const *record_usage
= __record_usage
;
2168 static int build_id__process_mmap(struct perf_tool
*tool
, union perf_event
*event
,
2169 struct perf_sample
*sample
, struct machine
*machine
)
2172 * We already have the kernel maps, put in place via perf_session__create_kernel_maps()
2173 * no need to add them twice.
2175 if (!(event
->header
.misc
& PERF_RECORD_MISC_USER
))
2177 return perf_event__process_mmap(tool
, event
, sample
, machine
);
2180 static int build_id__process_mmap2(struct perf_tool
*tool
, union perf_event
*event
,
2181 struct perf_sample
*sample
, struct machine
*machine
)
2184 * We already have the kernel maps, put in place via perf_session__create_kernel_maps()
2185 * no need to add them twice.
2187 if (!(event
->header
.misc
& PERF_RECORD_MISC_USER
))
2190 return perf_event__process_mmap2(tool
, event
, sample
, machine
);
2194 * XXX Ideally would be local to cmd_record() and passed to a record__new
2195 * because we need to have access to it in record__exit, that is called
2196 * after cmd_record() exits, but since record_options need to be accessible to
2197 * builtin-script, leave it here.
2199 * At least we don't ouch it in all the other functions here directly.
2201 * Just say no to tons of global variables, sigh.
2203 static struct record record
= {
2205 .sample_time
= true,
2206 .mmap_pages
= UINT_MAX
,
2207 .user_freq
= UINT_MAX
,
2208 .user_interval
= ULLONG_MAX
,
2212 .default_per_cpu
= true,
2214 .mmap_flush
= MMAP_FLUSH_DEFAULT
,
2217 .sample
= process_sample_event
,
2218 .fork
= perf_event__process_fork
,
2219 .exit
= perf_event__process_exit
,
2220 .comm
= perf_event__process_comm
,
2221 .namespaces
= perf_event__process_namespaces
,
2222 .mmap
= build_id__process_mmap
,
2223 .mmap2
= build_id__process_mmap2
,
2224 .ordered_events
= true,
2228 const char record_callchain_help
[] = CALLCHAIN_RECORD_HELP
2229 "\n\t\t\t\tDefault: fp";
2231 static bool dry_run
;
2234 * XXX Will stay a global variable till we fix builtin-script.c to stop messing
2235 * with it and switch to use the library functions in perf_evlist that came
2236 * from builtin-record.c, i.e. use record_opts,
2237 * perf_evlist__prepare_workload, etc instead of fork+exec'in 'perf record',
2240 static struct option __record_options
[] = {
2241 OPT_CALLBACK('e', "event", &record
.evlist
, "event",
2242 "event selector. use 'perf list' to list available events",
2243 parse_events_option
),
2244 OPT_CALLBACK(0, "filter", &record
.evlist
, "filter",
2245 "event filter", parse_filter
),
2246 OPT_CALLBACK_NOOPT(0, "exclude-perf", &record
.evlist
,
2247 NULL
, "don't record events from perf itself",
2249 OPT_STRING('p', "pid", &record
.opts
.target
.pid
, "pid",
2250 "record events on existing process id"),
2251 OPT_STRING('t', "tid", &record
.opts
.target
.tid
, "tid",
2252 "record events on existing thread id"),
2253 OPT_INTEGER('r', "realtime", &record
.realtime_prio
,
2254 "collect data with this RT SCHED_FIFO priority"),
2255 OPT_BOOLEAN(0, "no-buffering", &record
.opts
.no_buffering
,
2256 "collect data without buffering"),
2257 OPT_BOOLEAN('R', "raw-samples", &record
.opts
.raw_samples
,
2258 "collect raw sample records from all opened counters"),
2259 OPT_BOOLEAN('a', "all-cpus", &record
.opts
.target
.system_wide
,
2260 "system-wide collection from all CPUs"),
2261 OPT_STRING('C', "cpu", &record
.opts
.target
.cpu_list
, "cpu",
2262 "list of cpus to monitor"),
2263 OPT_U64('c', "count", &record
.opts
.user_interval
, "event period to sample"),
2264 OPT_STRING('o', "output", &record
.data
.path
, "file",
2265 "output file name"),
2266 OPT_BOOLEAN_SET('i', "no-inherit", &record
.opts
.no_inherit
,
2267 &record
.opts
.no_inherit_set
,
2268 "child tasks do not inherit counters"),
2269 OPT_BOOLEAN(0, "tail-synthesize", &record
.opts
.tail_synthesize
,
2270 "synthesize non-sample events at the end of output"),
2271 OPT_BOOLEAN(0, "overwrite", &record
.opts
.overwrite
, "use overwrite mode"),
2272 OPT_BOOLEAN(0, "no-bpf-event", &record
.opts
.no_bpf_event
, "record bpf events"),
2273 OPT_BOOLEAN(0, "strict-freq", &record
.opts
.strict_freq
,
2274 "Fail if the specified frequency can't be used"),
2275 OPT_CALLBACK('F', "freq", &record
.opts
, "freq or 'max'",
2276 "profile at this frequency",
2277 record__parse_freq
),
2278 OPT_CALLBACK('m', "mmap-pages", &record
.opts
, "pages[,pages]",
2279 "number of mmap data pages and AUX area tracing mmap pages",
2280 record__parse_mmap_pages
),
2281 OPT_CALLBACK(0, "mmap-flush", &record
.opts
, "number",
2282 "Minimal number of bytes that is extracted from mmap data pages (default: 1)",
2283 record__mmap_flush_parse
),
2284 OPT_BOOLEAN(0, "group", &record
.opts
.group
,
2285 "put the counters into a counter group"),
2286 OPT_CALLBACK_NOOPT('g', NULL
, &callchain_param
,
2287 NULL
, "enables call-graph recording" ,
2288 &record_callchain_opt
),
2289 OPT_CALLBACK(0, "call-graph", &record
.opts
,
2290 "record_mode[,record_size]", record_callchain_help
,
2291 &record_parse_callchain_opt
),
2292 OPT_INCR('v', "verbose", &verbose
,
2293 "be more verbose (show counter open errors, etc)"),
2294 OPT_BOOLEAN('q', "quiet", &quiet
, "don't print any message"),
2295 OPT_BOOLEAN('s', "stat", &record
.opts
.inherit_stat
,
2296 "per thread counts"),
2297 OPT_BOOLEAN('d', "data", &record
.opts
.sample_address
, "Record the sample addresses"),
2298 OPT_BOOLEAN(0, "phys-data", &record
.opts
.sample_phys_addr
,
2299 "Record the sample physical addresses"),
2300 OPT_BOOLEAN(0, "sample-cpu", &record
.opts
.sample_cpu
, "Record the sample cpu"),
2301 OPT_BOOLEAN_SET('T', "timestamp", &record
.opts
.sample_time
,
2302 &record
.opts
.sample_time_set
,
2303 "Record the sample timestamps"),
2304 OPT_BOOLEAN_SET('P', "period", &record
.opts
.period
, &record
.opts
.period_set
,
2305 "Record the sample period"),
2306 OPT_BOOLEAN('n', "no-samples", &record
.opts
.no_samples
,
2308 OPT_BOOLEAN_SET('N', "no-buildid-cache", &record
.no_buildid_cache
,
2309 &record
.no_buildid_cache_set
,
2310 "do not update the buildid cache"),
2311 OPT_BOOLEAN_SET('B', "no-buildid", &record
.no_buildid
,
2312 &record
.no_buildid_set
,
2313 "do not collect buildids in perf.data"),
2314 OPT_CALLBACK('G', "cgroup", &record
.evlist
, "name",
2315 "monitor event in cgroup name only",
2317 OPT_UINTEGER('D', "delay", &record
.opts
.initial_delay
,
2318 "ms to wait before starting measurement after program start"),
2319 OPT_BOOLEAN(0, "kcore", &record
.opts
.kcore
, "copy /proc/kcore"),
2320 OPT_STRING('u', "uid", &record
.opts
.target
.uid_str
, "user",
2323 OPT_CALLBACK_NOOPT('b', "branch-any", &record
.opts
.branch_stack
,
2324 "branch any", "sample any taken branches",
2325 parse_branch_stack
),
2327 OPT_CALLBACK('j', "branch-filter", &record
.opts
.branch_stack
,
2328 "branch filter mask", "branch stack filter modes",
2329 parse_branch_stack
),
2330 OPT_BOOLEAN('W', "weight", &record
.opts
.sample_weight
,
2331 "sample by weight (on special events only)"),
2332 OPT_BOOLEAN(0, "transaction", &record
.opts
.sample_transaction
,
2333 "sample transaction flags (special events only)"),
2334 OPT_BOOLEAN(0, "per-thread", &record
.opts
.target
.per_thread
,
2335 "use per-thread mmaps"),
2336 OPT_CALLBACK_OPTARG('I', "intr-regs", &record
.opts
.sample_intr_regs
, NULL
, "any register",
2337 "sample selected machine registers on interrupt,"
2338 " use '-I?' to list register names", parse_intr_regs
),
2339 OPT_CALLBACK_OPTARG(0, "user-regs", &record
.opts
.sample_user_regs
, NULL
, "any register",
2340 "sample selected machine registers on interrupt,"
2341 " use '--user-regs=?' to list register names", parse_user_regs
),
2342 OPT_BOOLEAN(0, "running-time", &record
.opts
.running_time
,
2343 "Record running/enabled time of read (:S) events"),
2344 OPT_CALLBACK('k', "clockid", &record
.opts
,
2345 "clockid", "clockid to use for events, see clock_gettime()",
2347 OPT_STRING_OPTARG('S', "snapshot", &record
.opts
.auxtrace_snapshot_opts
,
2348 "opts", "AUX area tracing Snapshot Mode", ""),
2349 OPT_STRING_OPTARG(0, "aux-sample", &record
.opts
.auxtrace_sample_opts
,
2350 "opts", "sample AUX area", ""),
2351 OPT_UINTEGER(0, "proc-map-timeout", &proc_map_timeout
,
2352 "per thread proc mmap processing timeout in ms"),
2353 OPT_BOOLEAN(0, "namespaces", &record
.opts
.record_namespaces
,
2354 "Record namespaces events"),
2355 OPT_BOOLEAN(0, "switch-events", &record
.opts
.record_switch_events
,
2356 "Record context switch events"),
2357 OPT_BOOLEAN_FLAG(0, "all-kernel", &record
.opts
.all_kernel
,
2358 "Configure all used events to run in kernel space.",
2359 PARSE_OPT_EXCLUSIVE
),
2360 OPT_BOOLEAN_FLAG(0, "all-user", &record
.opts
.all_user
,
2361 "Configure all used events to run in user space.",
2362 PARSE_OPT_EXCLUSIVE
),
2363 OPT_BOOLEAN(0, "kernel-callchains", &record
.opts
.kernel_callchains
,
2364 "collect kernel callchains"),
2365 OPT_BOOLEAN(0, "user-callchains", &record
.opts
.user_callchains
,
2366 "collect user callchains"),
2367 OPT_STRING(0, "clang-path", &llvm_param
.clang_path
, "clang path",
2368 "clang binary to use for compiling BPF scriptlets"),
2369 OPT_STRING(0, "clang-opt", &llvm_param
.clang_opt
, "clang options",
2370 "options passed to clang when compiling BPF scriptlets"),
2371 OPT_STRING(0, "vmlinux", &symbol_conf
.vmlinux_name
,
2372 "file", "vmlinux pathname"),
2373 OPT_BOOLEAN(0, "buildid-all", &record
.buildid_all
,
2374 "Record build-id of all DSOs regardless of hits"),
2375 OPT_BOOLEAN(0, "timestamp-filename", &record
.timestamp_filename
,
2376 "append timestamp to output filename"),
2377 OPT_BOOLEAN(0, "timestamp-boundary", &record
.timestamp_boundary
,
2378 "Record timestamp boundary (time of first/last samples)"),
2379 OPT_STRING_OPTARG_SET(0, "switch-output", &record
.switch_output
.str
,
2380 &record
.switch_output
.set
, "signal or size[BKMG] or time[smhd]",
2381 "Switch output when receiving SIGUSR2 (signal) or cross a size or time threshold",
2383 OPT_INTEGER(0, "switch-max-files", &record
.switch_output
.num_files
,
2384 "Limit number of switch output generated files"),
2385 OPT_BOOLEAN(0, "dry-run", &dry_run
,
2386 "Parse options then exit"),
2387 #ifdef HAVE_AIO_SUPPORT
2388 OPT_CALLBACK_OPTARG(0, "aio", &record
.opts
,
2389 &nr_cblocks_default
, "n", "Use <n> control blocks in asynchronous trace writing mode (default: 1, max: 4)",
2392 OPT_CALLBACK(0, "affinity", &record
.opts
, "node|cpu",
2393 "Set affinity mask of trace reading thread to NUMA node cpu mask or cpu of processed mmap buffer",
2394 record__parse_affinity
),
2395 #ifdef HAVE_ZSTD_SUPPORT
2396 OPT_CALLBACK_OPTARG('z', "compression-level", &record
.opts
, &comp_level_default
,
2397 "n", "Compressed records using specified level (default: 1 - fastest compression, 22 - greatest compression)",
2398 record__parse_comp_level
),
2400 OPT_CALLBACK(0, "max-size", &record
.output_max_size
,
2401 "size", "Limit the maximum size of the output file", parse_output_max_size
),
2405 struct option
*record_options
= __record_options
;
2407 int cmd_record(int argc
, const char **argv
)
2410 struct record
*rec
= &record
;
2411 char errbuf
[BUFSIZ
];
2413 setlocale(LC_ALL
, "");
2415 #ifndef HAVE_LIBBPF_SUPPORT
2416 # define set_nobuild(s, l, c) set_option_nobuild(record_options, s, l, "NO_LIBBPF=1", c)
2417 set_nobuild('\0', "clang-path", true);
2418 set_nobuild('\0', "clang-opt", true);
2422 #ifndef HAVE_BPF_PROLOGUE
2423 # if !defined (HAVE_DWARF_SUPPORT)
2424 # define REASON "NO_DWARF=1"
2425 # elif !defined (HAVE_LIBBPF_SUPPORT)
2426 # define REASON "NO_LIBBPF=1"
2428 # define REASON "this architecture doesn't support BPF prologue"
2430 # define set_nobuild(s, l, c) set_option_nobuild(record_options, s, l, REASON, c)
2431 set_nobuild('\0', "vmlinux", true);
2436 CPU_ZERO(&rec
->affinity_mask
);
2437 rec
->opts
.affinity
= PERF_AFFINITY_SYS
;
2439 rec
->evlist
= evlist__new();
2440 if (rec
->evlist
== NULL
)
2443 err
= perf_config(perf_record_config
, rec
);
2447 argc
= parse_options(argc
, argv
, record_options
, record_usage
,
2448 PARSE_OPT_STOP_AT_NON_OPTION
);
2450 perf_quiet_option();
2452 /* Make system wide (-a) the default target. */
2453 if (!argc
&& target__none(&rec
->opts
.target
))
2454 rec
->opts
.target
.system_wide
= true;
2456 if (nr_cgroups
&& !rec
->opts
.target
.system_wide
) {
2457 usage_with_options_msg(record_usage
, record_options
,
2458 "cgroup monitoring only available in system-wide mode");
2462 if (rec
->opts
.kcore
)
2463 rec
->data
.is_dir
= true;
2465 if (rec
->opts
.comp_level
!= 0) {
2466 pr_debug("Compression enabled, disabling build id collection at the end of the session.\n");
2467 rec
->no_buildid
= true;
2470 if (rec
->opts
.record_switch_events
&&
2471 !perf_can_record_switch_events()) {
2472 ui__error("kernel does not support recording context switch events\n");
2473 parse_options_usage(record_usage
, record_options
, "switch-events", 0);
2477 if (switch_output_setup(rec
)) {
2478 parse_options_usage(record_usage
, record_options
, "switch-output", 0);
2482 if (rec
->switch_output
.time
) {
2483 signal(SIGALRM
, alarm_sig_handler
);
2484 alarm(rec
->switch_output
.time
);
2487 if (rec
->switch_output
.num_files
) {
2488 rec
->switch_output
.filenames
= calloc(sizeof(char *),
2489 rec
->switch_output
.num_files
);
2490 if (!rec
->switch_output
.filenames
)
2495 * Allow aliases to facilitate the lookup of symbols for address
2496 * filters. Refer to auxtrace_parse_filters().
2498 symbol_conf
.allow_aliases
= true;
2502 err
= record__auxtrace_init(rec
);
2509 err
= bpf__setup_stdout(rec
->evlist
);
2511 bpf__strerror_setup_stdout(rec
->evlist
, err
, errbuf
, sizeof(errbuf
));
2512 pr_err("ERROR: Setup BPF stdout failed: %s\n",
2519 if (rec
->no_buildid_cache
|| rec
->no_buildid
) {
2520 disable_buildid_cache();
2521 } else if (rec
->switch_output
.enabled
) {
2523 * In 'perf record --switch-output', disable buildid
2524 * generation by default to reduce data file switching
2525 * overhead. Still generate buildid if they are required
2528 * perf record --switch-output --no-no-buildid \
2529 * --no-no-buildid-cache
2531 * Following code equals to:
2533 * if ((rec->no_buildid || !rec->no_buildid_set) &&
2534 * (rec->no_buildid_cache || !rec->no_buildid_cache_set))
2535 * disable_buildid_cache();
2537 bool disable
= true;
2539 if (rec
->no_buildid_set
&& !rec
->no_buildid
)
2541 if (rec
->no_buildid_cache_set
&& !rec
->no_buildid_cache
)
2544 rec
->no_buildid
= true;
2545 rec
->no_buildid_cache
= true;
2546 disable_buildid_cache();
2550 if (record
.opts
.overwrite
)
2551 record
.opts
.tail_synthesize
= true;
2553 if (rec
->evlist
->core
.nr_entries
== 0 &&
2554 __perf_evlist__add_default(rec
->evlist
, !record
.opts
.no_samples
) < 0) {
2555 pr_err("Not enough memory for event selector list\n");
2559 if (rec
->opts
.target
.tid
&& !rec
->opts
.no_inherit_set
)
2560 rec
->opts
.no_inherit
= true;
2562 err
= target__validate(&rec
->opts
.target
);
2564 target__strerror(&rec
->opts
.target
, err
, errbuf
, BUFSIZ
);
2565 ui__warning("%s\n", errbuf
);
2568 err
= target__parse_uid(&rec
->opts
.target
);
2570 int saved_errno
= errno
;
2572 target__strerror(&rec
->opts
.target
, err
, errbuf
, BUFSIZ
);
2573 ui__error("%s", errbuf
);
2579 /* Enable ignoring missing threads when -u/-p option is defined. */
2580 rec
->opts
.ignore_missing_thread
= rec
->opts
.target
.uid
!= UINT_MAX
|| rec
->opts
.target
.pid
;
2583 if (perf_evlist__create_maps(rec
->evlist
, &rec
->opts
.target
) < 0)
2584 usage_with_options(record_usage
, record_options
);
2586 err
= auxtrace_record__options(rec
->itr
, rec
->evlist
, &rec
->opts
);
2591 * We take all buildids when the file contains
2592 * AUX area tracing data because we do not decode the
2593 * trace because it would take too long.
2595 if (rec
->opts
.full_auxtrace
)
2596 rec
->buildid_all
= true;
2598 if (record_opts__config(&rec
->opts
)) {
2603 if (rec
->opts
.nr_cblocks
> nr_cblocks_max
)
2604 rec
->opts
.nr_cblocks
= nr_cblocks_max
;
2605 pr_debug("nr_cblocks: %d\n", rec
->opts
.nr_cblocks
);
2607 pr_debug("affinity: %s\n", affinity_tags
[rec
->opts
.affinity
]);
2608 pr_debug("mmap flush: %d\n", rec
->opts
.mmap_flush
);
2610 if (rec
->opts
.comp_level
> comp_level_max
)
2611 rec
->opts
.comp_level
= comp_level_max
;
2612 pr_debug("comp level: %d\n", rec
->opts
.comp_level
);
2614 err
= __cmd_record(&record
, argc
, argv
);
2616 evlist__delete(rec
->evlist
);
2618 auxtrace_record__free(rec
->itr
);
2622 static void snapshot_sig_handler(int sig __maybe_unused
)
2624 struct record
*rec
= &record
;
2626 if (trigger_is_ready(&auxtrace_snapshot_trigger
)) {
2627 trigger_hit(&auxtrace_snapshot_trigger
);
2628 auxtrace_record__snapshot_started
= 1;
2629 if (auxtrace_record__snapshot_start(record
.itr
))
2630 trigger_error(&auxtrace_snapshot_trigger
);
2633 if (switch_output_signal(rec
))
2634 trigger_hit(&switch_output_trigger
);
2637 static void alarm_sig_handler(int sig __maybe_unused
)
2639 struct record
*rec
= &record
;
2641 if (switch_output_time(rec
))
2642 trigger_hit(&switch_output_trigger
);