]> git.proxmox.com Git - mirror_ubuntu-hirsute-kernel.git/blame - tools/perf/builtin-record.c
perf session: Define 'bytes_transferred' and 'bytes_compressed' metrics
[mirror_ubuntu-hirsute-kernel.git] / tools / perf / builtin-record.c
CommitLineData
b2441318 1// SPDX-License-Identifier: GPL-2.0
abaff32a 2/*
bf9e1876
IM
3 * builtin-record.c
4 *
5 * Builtin record command: Record the profile of a workload
6 * (or a CPU, or a PID) into the perf.data output file - for
7 * later analysis via perf report.
abaff32a 8 */
16f762a2 9#include "builtin.h"
bf9e1876
IM
10
11#include "perf.h"
12
6122e4e4 13#include "util/build-id.h"
6eda5838 14#include "util/util.h"
4b6ab94e 15#include <subcmd/parse-options.h>
8ad8db37 16#include "util/parse-events.h"
41840d21 17#include "util/config.h"
6eda5838 18
8f651eae 19#include "util/callchain.h"
f14d5707 20#include "util/cgroup.h"
7c6a1c65 21#include "util/header.h"
66e274f3 22#include "util/event.h"
361c99a6 23#include "util/evlist.h"
69aad6f1 24#include "util/evsel.h"
8f28827a 25#include "util/debug.h"
94c744b6 26#include "util/session.h"
45694aa7 27#include "util/tool.h"
8d06367f 28#include "util/symbol.h"
a12b51c4 29#include "util/cpumap.h"
fd78260b 30#include "util/thread_map.h"
f5fc1412 31#include "util/data.h"
bcc84ec6 32#include "util/perf_regs.h"
ef149c25 33#include "util/auxtrace.h"
46bc29b9 34#include "util/tsc.h"
f00898f4 35#include "util/parse-branch-options.h"
bcc84ec6 36#include "util/parse-regs-options.h"
71dc2326 37#include "util/llvm-utils.h"
8690a2a7 38#include "util/bpf-loader.h"
5f9cf599 39#include "util/trigger.h"
a074865e 40#include "util/perf-hooks.h"
f13de660 41#include "util/cpu-set-sched.h"
c5e4027e 42#include "util/time-utils.h"
58db1d6e 43#include "util/units.h"
7b612e29 44#include "util/bpf-event.h"
d8871ea7 45#include "asm/bug.h"
7c6a1c65 46
a43783ae 47#include <errno.h>
fd20e811 48#include <inttypes.h>
67230479 49#include <locale.h>
4208735d 50#include <poll.h>
97124d5e 51#include <unistd.h>
de9ac07b 52#include <sched.h>
9607ad3a 53#include <signal.h>
a41794cd 54#include <sys/mman.h>
4208735d 55#include <sys/wait.h>
0693e680 56#include <linux/time64.h>
78da39fa 57
1b43b704 58struct switch_output {
dc0c6127 59 bool enabled;
1b43b704 60 bool signal;
dc0c6127 61 unsigned long size;
bfacbe3b 62 unsigned long time;
cb4e1ebb
JO
63 const char *str;
64 bool set;
03724b2e
AK
65 char **filenames;
66 int num_files;
67 int cur_file;
1b43b704
JO
68};
69
8c6f45a7 70struct record {
45694aa7 71 struct perf_tool tool;
b4006796 72 struct record_opts opts;
d20deb64 73 u64 bytes_written;
8ceb41d7 74 struct perf_data data;
ef149c25 75 struct auxtrace_record *itr;
d20deb64
ACM
76 struct perf_evlist *evlist;
77 struct perf_session *session;
d20deb64 78 int realtime_prio;
d20deb64 79 bool no_buildid;
d2db9a98 80 bool no_buildid_set;
d20deb64 81 bool no_buildid_cache;
d2db9a98 82 bool no_buildid_cache_set;
6156681b 83 bool buildid_all;
ecfd7a9c 84 bool timestamp_filename;
68588baf 85 bool timestamp_boundary;
1b43b704 86 struct switch_output switch_output;
9f065194 87 unsigned long long samples;
9d2ed645 88 cpu_set_t affinity_mask;
0f82ebc4 89};
a21ca2ca 90
dc0c6127
JO
91static volatile int auxtrace_record__snapshot_started;
92static DEFINE_TRIGGER(auxtrace_snapshot_trigger);
93static DEFINE_TRIGGER(switch_output_trigger);
94
9d2ed645
AB
95static const char *affinity_tags[PERF_AFFINITY_MAX] = {
96 "SYS", "NODE", "CPU"
97};
98
dc0c6127
JO
99static bool switch_output_signal(struct record *rec)
100{
101 return rec->switch_output.signal &&
102 trigger_is_ready(&switch_output_trigger);
103}
104
105static bool switch_output_size(struct record *rec)
106{
107 return rec->switch_output.size &&
108 trigger_is_ready(&switch_output_trigger) &&
109 (rec->bytes_written >= rec->switch_output.size);
110}
111
bfacbe3b
JO
112static bool switch_output_time(struct record *rec)
113{
114 return rec->switch_output.time &&
115 trigger_is_ready(&switch_output_trigger);
116}
117
ded2b8fe
JO
118static int record__write(struct record *rec, struct perf_mmap *map __maybe_unused,
119 void *bf, size_t size)
f5970550 120{
ded2b8fe
JO
121 struct perf_data_file *file = &rec->session->data->file;
122
123 if (perf_data_file__write(file, bf, size) < 0) {
50a9b868
JO
124 pr_err("failed to write perf data, error: %m\n");
125 return -1;
f5970550 126 }
8d3eca20 127
cf8b2e69 128 rec->bytes_written += size;
dc0c6127
JO
129
130 if (switch_output_size(rec))
131 trigger_hit(&switch_output_trigger);
132
8d3eca20 133 return 0;
f5970550
PZ
134}
135
d3d1af6f
AB
136#ifdef HAVE_AIO_SUPPORT
137static int record__aio_write(struct aiocb *cblock, int trace_fd,
138 void *buf, size_t size, off_t off)
139{
140 int rc;
141
142 cblock->aio_fildes = trace_fd;
143 cblock->aio_buf = buf;
144 cblock->aio_nbytes = size;
145 cblock->aio_offset = off;
146 cblock->aio_sigevent.sigev_notify = SIGEV_NONE;
147
148 do {
149 rc = aio_write(cblock);
150 if (rc == 0) {
151 break;
152 } else if (errno != EAGAIN) {
153 cblock->aio_fildes = -1;
154 pr_err("failed to queue perf data, error: %m\n");
155 break;
156 }
157 } while (1);
158
159 return rc;
160}
161
162static int record__aio_complete(struct perf_mmap *md, struct aiocb *cblock)
163{
164 void *rem_buf;
165 off_t rem_off;
166 size_t rem_size;
167 int rc, aio_errno;
168 ssize_t aio_ret, written;
169
170 aio_errno = aio_error(cblock);
171 if (aio_errno == EINPROGRESS)
172 return 0;
173
174 written = aio_ret = aio_return(cblock);
175 if (aio_ret < 0) {
176 if (aio_errno != EINTR)
177 pr_err("failed to write perf data, error: %m\n");
178 written = 0;
179 }
180
181 rem_size = cblock->aio_nbytes - written;
182
183 if (rem_size == 0) {
184 cblock->aio_fildes = -1;
185 /*
186 * md->refcount is incremented in perf_mmap__push() for
187 * every enqueued aio write request so decrement it because
188 * the request is now complete.
189 */
190 perf_mmap__put(md);
191 rc = 1;
192 } else {
193 /*
194 * aio write request may require restart with the
195 * reminder if the kernel didn't write whole
196 * chunk at once.
197 */
198 rem_off = cblock->aio_offset + written;
199 rem_buf = (void *)(cblock->aio_buf + written);
200 record__aio_write(cblock, cblock->aio_fildes,
201 rem_buf, rem_size, rem_off);
202 rc = 0;
203 }
204
205 return rc;
206}
207
93f20c0f 208static int record__aio_sync(struct perf_mmap *md, bool sync_all)
d3d1af6f 209{
93f20c0f
AB
210 struct aiocb **aiocb = md->aio.aiocb;
211 struct aiocb *cblocks = md->aio.cblocks;
d3d1af6f 212 struct timespec timeout = { 0, 1000 * 1000 * 1 }; /* 1ms */
93f20c0f 213 int i, do_suspend;
d3d1af6f
AB
214
215 do {
93f20c0f
AB
216 do_suspend = 0;
217 for (i = 0; i < md->aio.nr_cblocks; ++i) {
218 if (cblocks[i].aio_fildes == -1 || record__aio_complete(md, &cblocks[i])) {
219 if (sync_all)
220 aiocb[i] = NULL;
221 else
222 return i;
223 } else {
224 /*
225 * Started aio write is not complete yet
226 * so it has to be waited before the
227 * next allocation.
228 */
229 aiocb[i] = &cblocks[i];
230 do_suspend = 1;
231 }
232 }
233 if (!do_suspend)
234 return -1;
d3d1af6f 235
93f20c0f 236 while (aio_suspend((const struct aiocb **)aiocb, md->aio.nr_cblocks, &timeout)) {
d3d1af6f
AB
237 if (!(errno == EAGAIN || errno == EINTR))
238 pr_err("failed to sync perf data, error: %m\n");
239 }
240 } while (1);
241}
242
243static int record__aio_pushfn(void *to, struct aiocb *cblock, void *bf, size_t size, off_t off)
244{
245 struct record *rec = to;
246 int ret, trace_fd = rec->session->data->file.fd;
247
248 rec->samples++;
249
250 ret = record__aio_write(cblock, trace_fd, bf, size, off);
251 if (!ret) {
252 rec->bytes_written += size;
253 if (switch_output_size(rec))
254 trigger_hit(&switch_output_trigger);
255 }
256
257 return ret;
258}
259
260static off_t record__aio_get_pos(int trace_fd)
261{
262 return lseek(trace_fd, 0, SEEK_CUR);
263}
264
265static void record__aio_set_pos(int trace_fd, off_t pos)
266{
267 lseek(trace_fd, pos, SEEK_SET);
268}
269
270static void record__aio_mmap_read_sync(struct record *rec)
271{
272 int i;
273 struct perf_evlist *evlist = rec->evlist;
274 struct perf_mmap *maps = evlist->mmap;
275
276 if (!rec->opts.nr_cblocks)
277 return;
278
279 for (i = 0; i < evlist->nr_mmaps; i++) {
280 struct perf_mmap *map = &maps[i];
281
282 if (map->base)
93f20c0f 283 record__aio_sync(map, true);
d3d1af6f
AB
284 }
285}
286
287static int nr_cblocks_default = 1;
93f20c0f 288static int nr_cblocks_max = 4;
d3d1af6f
AB
289
290static int record__aio_parse(const struct option *opt,
93f20c0f 291 const char *str,
d3d1af6f
AB
292 int unset)
293{
294 struct record_opts *opts = (struct record_opts *)opt->value;
295
93f20c0f 296 if (unset) {
d3d1af6f 297 opts->nr_cblocks = 0;
93f20c0f
AB
298 } else {
299 if (str)
300 opts->nr_cblocks = strtol(str, NULL, 0);
301 if (!opts->nr_cblocks)
302 opts->nr_cblocks = nr_cblocks_default;
303 }
d3d1af6f
AB
304
305 return 0;
306}
307#else /* HAVE_AIO_SUPPORT */
93f20c0f
AB
308static int nr_cblocks_max = 0;
309
310static int record__aio_sync(struct perf_mmap *md __maybe_unused, bool sync_all __maybe_unused)
d3d1af6f 311{
93f20c0f 312 return -1;
d3d1af6f
AB
313}
314
315static int record__aio_pushfn(void *to __maybe_unused, struct aiocb *cblock __maybe_unused,
316 void *bf __maybe_unused, size_t size __maybe_unused, off_t off __maybe_unused)
317{
318 return -1;
319}
320
321static off_t record__aio_get_pos(int trace_fd __maybe_unused)
322{
323 return -1;
324}
325
326static void record__aio_set_pos(int trace_fd __maybe_unused, off_t pos __maybe_unused)
327{
328}
329
330static void record__aio_mmap_read_sync(struct record *rec __maybe_unused)
331{
332}
333#endif
334
335static int record__aio_enabled(struct record *rec)
336{
337 return rec->opts.nr_cblocks > 0;
338}
339
470530bb
AB
340#define MMAP_FLUSH_DEFAULT 1
341static int record__mmap_flush_parse(const struct option *opt,
342 const char *str,
343 int unset)
344{
345 int flush_max;
346 struct record_opts *opts = (struct record_opts *)opt->value;
347 static struct parse_tag tags[] = {
348 { .tag = 'B', .mult = 1 },
349 { .tag = 'K', .mult = 1 << 10 },
350 { .tag = 'M', .mult = 1 << 20 },
351 { .tag = 'G', .mult = 1 << 30 },
352 { .tag = 0 },
353 };
354
355 if (unset)
356 return 0;
357
358 if (str) {
359 opts->mmap_flush = parse_tag_value(str, tags);
360 if (opts->mmap_flush == (int)-1)
361 opts->mmap_flush = strtol(str, NULL, 0);
362 }
363
364 if (!opts->mmap_flush)
365 opts->mmap_flush = MMAP_FLUSH_DEFAULT;
366
367 flush_max = perf_evlist__mmap_size(opts->mmap_pages);
368 flush_max /= 4;
369 if (opts->mmap_flush > flush_max)
370 opts->mmap_flush = flush_max;
371
372 return 0;
373}
374
45694aa7 375static int process_synthesized_event(struct perf_tool *tool,
d20deb64 376 union perf_event *event,
1d037ca1
IT
377 struct perf_sample *sample __maybe_unused,
378 struct machine *machine __maybe_unused)
234fbbf5 379{
8c6f45a7 380 struct record *rec = container_of(tool, struct record, tool);
ded2b8fe 381 return record__write(rec, NULL, event, event->header.size);
234fbbf5
ACM
382}
383
ded2b8fe 384static int record__pushfn(struct perf_mmap *map, void *to, void *bf, size_t size)
d37f1586
ACM
385{
386 struct record *rec = to;
387
388 rec->samples++;
ded2b8fe 389 return record__write(rec, map, bf, size);
d37f1586
ACM
390}
391
2dd6d8a1
AH
392static volatile int done;
393static volatile int signr = -1;
394static volatile int child_finished;
c0bdc1c4 395
2dd6d8a1
AH
396static void sig_handler(int sig)
397{
398 if (sig == SIGCHLD)
399 child_finished = 1;
400 else
401 signr = sig;
402
403 done = 1;
404}
405
a074865e
WN
406static void sigsegv_handler(int sig)
407{
408 perf_hooks__recover();
409 sighandler_dump_stack(sig);
410}
411
2dd6d8a1
AH
412static void record__sig_exit(void)
413{
414 if (signr == -1)
415 return;
416
417 signal(signr, SIG_DFL);
418 raise(signr);
419}
420
e31f0d01
AH
421#ifdef HAVE_AUXTRACE_SUPPORT
422
ef149c25 423static int record__process_auxtrace(struct perf_tool *tool,
ded2b8fe 424 struct perf_mmap *map,
ef149c25
AH
425 union perf_event *event, void *data1,
426 size_t len1, void *data2, size_t len2)
427{
428 struct record *rec = container_of(tool, struct record, tool);
8ceb41d7 429 struct perf_data *data = &rec->data;
ef149c25
AH
430 size_t padding;
431 u8 pad[8] = {0};
432
cd3dd8dd 433 if (!perf_data__is_pipe(data) && !perf_data__is_dir(data)) {
99fa2984 434 off_t file_offset;
8ceb41d7 435 int fd = perf_data__fd(data);
99fa2984
AH
436 int err;
437
438 file_offset = lseek(fd, 0, SEEK_CUR);
439 if (file_offset == -1)
440 return -1;
441 err = auxtrace_index__auxtrace_event(&rec->session->auxtrace_index,
442 event, file_offset);
443 if (err)
444 return err;
445 }
446
ef149c25
AH
447 /* event.auxtrace.size includes padding, see __auxtrace_mmap__read() */
448 padding = (len1 + len2) & 7;
449 if (padding)
450 padding = 8 - padding;
451
ded2b8fe
JO
452 record__write(rec, map, event, event->header.size);
453 record__write(rec, map, data1, len1);
ef149c25 454 if (len2)
ded2b8fe
JO
455 record__write(rec, map, data2, len2);
456 record__write(rec, map, &pad, padding);
ef149c25
AH
457
458 return 0;
459}
460
461static int record__auxtrace_mmap_read(struct record *rec,
e035f4ca 462 struct perf_mmap *map)
ef149c25
AH
463{
464 int ret;
465
e035f4ca 466 ret = auxtrace_mmap__read(map, rec->itr, &rec->tool,
ef149c25
AH
467 record__process_auxtrace);
468 if (ret < 0)
469 return ret;
470
471 if (ret)
472 rec->samples++;
473
474 return 0;
475}
476
2dd6d8a1 477static int record__auxtrace_mmap_read_snapshot(struct record *rec,
e035f4ca 478 struct perf_mmap *map)
2dd6d8a1
AH
479{
480 int ret;
481
e035f4ca 482 ret = auxtrace_mmap__read_snapshot(map, rec->itr, &rec->tool,
2dd6d8a1
AH
483 record__process_auxtrace,
484 rec->opts.auxtrace_snapshot_size);
485 if (ret < 0)
486 return ret;
487
488 if (ret)
489 rec->samples++;
490
491 return 0;
492}
493
494static int record__auxtrace_read_snapshot_all(struct record *rec)
495{
496 int i;
497 int rc = 0;
498
499 for (i = 0; i < rec->evlist->nr_mmaps; i++) {
e035f4ca 500 struct perf_mmap *map = &rec->evlist->mmap[i];
2dd6d8a1 501
e035f4ca 502 if (!map->auxtrace_mmap.base)
2dd6d8a1
AH
503 continue;
504
e035f4ca 505 if (record__auxtrace_mmap_read_snapshot(rec, map) != 0) {
2dd6d8a1
AH
506 rc = -1;
507 goto out;
508 }
509 }
510out:
511 return rc;
512}
513
514static void record__read_auxtrace_snapshot(struct record *rec)
515{
516 pr_debug("Recording AUX area tracing snapshot\n");
517 if (record__auxtrace_read_snapshot_all(rec) < 0) {
5f9cf599 518 trigger_error(&auxtrace_snapshot_trigger);
2dd6d8a1 519 } else {
5f9cf599
WN
520 if (auxtrace_record__snapshot_finish(rec->itr))
521 trigger_error(&auxtrace_snapshot_trigger);
522 else
523 trigger_ready(&auxtrace_snapshot_trigger);
2dd6d8a1
AH
524 }
525}
526
4b5ea3bd
AH
527static int record__auxtrace_init(struct record *rec)
528{
529 int err;
530
531 if (!rec->itr) {
532 rec->itr = auxtrace_record__init(rec->evlist, &err);
533 if (err)
534 return err;
535 }
536
537 err = auxtrace_parse_snapshot_options(rec->itr, &rec->opts,
538 rec->opts.auxtrace_snapshot_opts);
539 if (err)
540 return err;
541
542 return auxtrace_parse_filters(rec->evlist);
543}
544
e31f0d01
AH
545#else
546
547static inline
548int record__auxtrace_mmap_read(struct record *rec __maybe_unused,
e035f4ca 549 struct perf_mmap *map __maybe_unused)
e31f0d01
AH
550{
551 return 0;
552}
553
2dd6d8a1
AH
554static inline
555void record__read_auxtrace_snapshot(struct record *rec __maybe_unused)
de9ac07b 556{
f7b7c26e
PZ
557}
558
2dd6d8a1
AH
559static inline
560int auxtrace_record__snapshot_start(struct auxtrace_record *itr __maybe_unused)
f7b7c26e 561{
2dd6d8a1 562 return 0;
de9ac07b
PZ
563}
564
4b5ea3bd
AH
565static int record__auxtrace_init(struct record *rec __maybe_unused)
566{
567 return 0;
568}
569
2dd6d8a1
AH
570#endif
571
cda57a8c
WN
572static int record__mmap_evlist(struct record *rec,
573 struct perf_evlist *evlist)
574{
575 struct record_opts *opts = &rec->opts;
576 char msg[512];
577
f13de660
AB
578 if (opts->affinity != PERF_AFFINITY_SYS)
579 cpu__setup_cpunode_map();
580
7a276ff6 581 if (perf_evlist__mmap_ex(evlist, opts->mmap_pages,
cda57a8c 582 opts->auxtrace_mmap_pages,
9d2ed645 583 opts->auxtrace_snapshot_mode,
470530bb
AB
584 opts->nr_cblocks, opts->affinity,
585 opts->mmap_flush) < 0) {
cda57a8c
WN
586 if (errno == EPERM) {
587 pr_err("Permission error mapping pages.\n"
588 "Consider increasing "
589 "/proc/sys/kernel/perf_event_mlock_kb,\n"
590 "or try again with a smaller value of -m/--mmap_pages.\n"
591 "(current value: %u,%u)\n",
592 opts->mmap_pages, opts->auxtrace_mmap_pages);
593 return -errno;
594 } else {
595 pr_err("failed to mmap with %d (%s)\n", errno,
c8b5f2c9 596 str_error_r(errno, msg, sizeof(msg)));
cda57a8c
WN
597 if (errno)
598 return -errno;
599 else
600 return -EINVAL;
601 }
602 }
603 return 0;
604}
605
606static int record__mmap(struct record *rec)
607{
608 return record__mmap_evlist(rec, rec->evlist);
609}
610
8c6f45a7 611static int record__open(struct record *rec)
dd7927f4 612{
d6195a6a 613 char msg[BUFSIZ];
6a4bb04c 614 struct perf_evsel *pos;
d20deb64
ACM
615 struct perf_evlist *evlist = rec->evlist;
616 struct perf_session *session = rec->session;
b4006796 617 struct record_opts *opts = &rec->opts;
8d3eca20 618 int rc = 0;
dd7927f4 619
d3dbf43c
ACM
620 /*
621 * For initial_delay we need to add a dummy event so that we can track
622 * PERF_RECORD_MMAP while we wait for the initial delay to enable the
623 * real events, the ones asked by the user.
624 */
625 if (opts->initial_delay) {
626 if (perf_evlist__add_dummy(evlist))
627 return -ENOMEM;
628
629 pos = perf_evlist__first(evlist);
630 pos->tracking = 0;
631 pos = perf_evlist__last(evlist);
632 pos->tracking = 1;
633 pos->attr.enable_on_exec = 1;
634 }
635
e68ae9cf 636 perf_evlist__config(evlist, opts, &callchain_param);
cac21425 637
e5cadb93 638 evlist__for_each_entry(evlist, pos) {
dd7927f4 639try_again:
d988d5ee 640 if (perf_evsel__open(pos, pos->cpus, pos->threads) < 0) {
56e52e85 641 if (perf_evsel__fallback(pos, errno, msg, sizeof(msg))) {
bb963e16 642 if (verbose > 0)
c0a54341 643 ui__warning("%s\n", msg);
d6d901c2
ZY
644 goto try_again;
645 }
cf99ad14
AK
646 if ((errno == EINVAL || errno == EBADF) &&
647 pos->leader != pos &&
648 pos->weak_group) {
649 pos = perf_evlist__reset_weak_group(evlist, pos);
650 goto try_again;
651 }
56e52e85
ACM
652 rc = -errno;
653 perf_evsel__open_strerror(pos, &opts->target,
654 errno, msg, sizeof(msg));
655 ui__error("%s\n", msg);
8d3eca20 656 goto out;
c171b552 657 }
bfd8f72c
AK
658
659 pos->supported = true;
c171b552 660 }
a43d3f08 661
23d4aad4 662 if (perf_evlist__apply_filters(evlist, &pos)) {
62d94b00 663 pr_err("failed to set filter \"%s\" on event %s with %d (%s)\n",
23d4aad4 664 pos->filter, perf_evsel__name(pos), errno,
c8b5f2c9 665 str_error_r(errno, msg, sizeof(msg)));
8d3eca20 666 rc = -1;
5d8bb1ec
MP
667 goto out;
668 }
669
cda57a8c
WN
670 rc = record__mmap(rec);
671 if (rc)
8d3eca20 672 goto out;
0a27d7f9 673
563aecb2 674 session->evlist = evlist;
7b56cce2 675 perf_session__set_id_hdr_size(session);
8d3eca20
DA
676out:
677 return rc;
16c8a109
PZ
678}
679
e3d59112
NK
680static int process_sample_event(struct perf_tool *tool,
681 union perf_event *event,
682 struct perf_sample *sample,
683 struct perf_evsel *evsel,
684 struct machine *machine)
685{
686 struct record *rec = container_of(tool, struct record, tool);
687
68588baf
JY
688 if (rec->evlist->first_sample_time == 0)
689 rec->evlist->first_sample_time = sample->time;
690
691 rec->evlist->last_sample_time = sample->time;
e3d59112 692
68588baf
JY
693 if (rec->buildid_all)
694 return 0;
695
696 rec->samples++;
e3d59112
NK
697 return build_id__mark_dso_hit(tool, event, sample, evsel, machine);
698}
699
8c6f45a7 700static int process_buildids(struct record *rec)
6122e4e4 701{
f5fc1412 702 struct perf_session *session = rec->session;
6122e4e4 703
45112e89 704 if (perf_data__size(&rec->data) == 0)
9f591fd7
ACM
705 return 0;
706
00dc8657
NK
707 /*
708 * During this process, it'll load kernel map and replace the
709 * dso->long_name to a real pathname it found. In this case
710 * we prefer the vmlinux path like
711 * /lib/modules/3.16.4/build/vmlinux
712 *
713 * rather than build-id path (in debug directory).
714 * $HOME/.debug/.build-id/f0/6e17aa50adf4d00b88925e03775de107611551
715 */
716 symbol_conf.ignore_vmlinux_buildid = true;
717
6156681b
NK
718 /*
719 * If --buildid-all is given, it marks all DSO regardless of hits,
68588baf
JY
720 * so no need to process samples. But if timestamp_boundary is enabled,
721 * it still needs to walk on all samples to get the timestamps of
722 * first/last samples.
6156681b 723 */
68588baf 724 if (rec->buildid_all && !rec->timestamp_boundary)
6156681b
NK
725 rec->tool.sample = NULL;
726
b7b61cbe 727 return perf_session__process_events(session);
6122e4e4
ACM
728}
729
8115d60c 730static void perf_event__synthesize_guest_os(struct machine *machine, void *data)
a1645ce1
ZY
731{
732 int err;
45694aa7 733 struct perf_tool *tool = data;
a1645ce1
ZY
734 /*
735 *As for guest kernel when processing subcommand record&report,
736 *we arrange module mmap prior to guest kernel mmap and trigger
737 *a preload dso because default guest module symbols are loaded
738 *from guest kallsyms instead of /lib/modules/XXX/XXX. This
739 *method is used to avoid symbol missing when the first addr is
740 *in module instead of in guest kernel.
741 */
45694aa7 742 err = perf_event__synthesize_modules(tool, process_synthesized_event,
743eb868 743 machine);
a1645ce1
ZY
744 if (err < 0)
745 pr_err("Couldn't record guest kernel [%d]'s reference"
23346f21 746 " relocation symbol.\n", machine->pid);
a1645ce1 747
a1645ce1
ZY
748 /*
749 * We use _stext for guest kernel because guest kernel's /proc/kallsyms
750 * have no _text sometimes.
751 */
45694aa7 752 err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event,
0ae617be 753 machine);
a1645ce1
ZY
754 if (err < 0)
755 pr_err("Couldn't record guest kernel [%d]'s reference"
23346f21 756 " relocation symbol.\n", machine->pid);
a1645ce1
ZY
757}
758
98402807
FW
759static struct perf_event_header finished_round_event = {
760 .size = sizeof(struct perf_event_header),
761 .type = PERF_RECORD_FINISHED_ROUND,
762};
763
f13de660
AB
764static void record__adjust_affinity(struct record *rec, struct perf_mmap *map)
765{
766 if (rec->opts.affinity != PERF_AFFINITY_SYS &&
767 !CPU_EQUAL(&rec->affinity_mask, &map->affinity_mask)) {
768 CPU_ZERO(&rec->affinity_mask);
769 CPU_OR(&rec->affinity_mask, &rec->affinity_mask, &map->affinity_mask);
770 sched_setaffinity(0, sizeof(rec->affinity_mask), &rec->affinity_mask);
771 }
772}
773
a4ea0ec4 774static int record__mmap_read_evlist(struct record *rec, struct perf_evlist *evlist,
470530bb 775 bool overwrite, bool synch)
98402807 776{
dcabb507 777 u64 bytes_written = rec->bytes_written;
0e2e63dd 778 int i;
8d3eca20 779 int rc = 0;
a4ea0ec4 780 struct perf_mmap *maps;
d3d1af6f
AB
781 int trace_fd = rec->data.file.fd;
782 off_t off;
98402807 783
cb21686b
WN
784 if (!evlist)
785 return 0;
ef149c25 786
0b72d69a 787 maps = overwrite ? evlist->overwrite_mmap : evlist->mmap;
a4ea0ec4
WN
788 if (!maps)
789 return 0;
790
0b72d69a 791 if (overwrite && evlist->bkw_mmap_state != BKW_MMAP_DATA_PENDING)
54cc54de
WN
792 return 0;
793
d3d1af6f
AB
794 if (record__aio_enabled(rec))
795 off = record__aio_get_pos(trace_fd);
796
cb21686b 797 for (i = 0; i < evlist->nr_mmaps; i++) {
470530bb 798 u64 flush = 0;
e035f4ca 799 struct perf_mmap *map = &maps[i];
cb21686b 800
e035f4ca 801 if (map->base) {
f13de660 802 record__adjust_affinity(rec, map);
470530bb
AB
803 if (synch) {
804 flush = map->flush;
805 map->flush = 1;
806 }
d3d1af6f
AB
807 if (!record__aio_enabled(rec)) {
808 if (perf_mmap__push(map, rec, record__pushfn) != 0) {
470530bb
AB
809 if (synch)
810 map->flush = flush;
d3d1af6f
AB
811 rc = -1;
812 goto out;
813 }
814 } else {
93f20c0f 815 int idx;
d3d1af6f
AB
816 /*
817 * Call record__aio_sync() to wait till map->data buffer
818 * becomes available after previous aio write request.
819 */
93f20c0f
AB
820 idx = record__aio_sync(map, false);
821 if (perf_mmap__aio_push(map, rec, idx, record__aio_pushfn, &off) != 0) {
d3d1af6f 822 record__aio_set_pos(trace_fd, off);
470530bb
AB
823 if (synch)
824 map->flush = flush;
d3d1af6f
AB
825 rc = -1;
826 goto out;
827 }
8d3eca20 828 }
470530bb
AB
829 if (synch)
830 map->flush = flush;
8d3eca20 831 }
ef149c25 832
e035f4ca
JO
833 if (map->auxtrace_mmap.base && !rec->opts.auxtrace_snapshot_mode &&
834 record__auxtrace_mmap_read(rec, map) != 0) {
ef149c25
AH
835 rc = -1;
836 goto out;
837 }
98402807
FW
838 }
839
d3d1af6f
AB
840 if (record__aio_enabled(rec))
841 record__aio_set_pos(trace_fd, off);
842
dcabb507
JO
843 /*
844 * Mark the round finished in case we wrote
845 * at least one event.
846 */
847 if (bytes_written != rec->bytes_written)
ded2b8fe 848 rc = record__write(rec, NULL, &finished_round_event, sizeof(finished_round_event));
8d3eca20 849
0b72d69a 850 if (overwrite)
54cc54de 851 perf_evlist__toggle_bkw_mmap(evlist, BKW_MMAP_EMPTY);
8d3eca20
DA
852out:
853 return rc;
98402807
FW
854}
855
470530bb 856static int record__mmap_read_all(struct record *rec, bool synch)
cb21686b
WN
857{
858 int err;
859
470530bb 860 err = record__mmap_read_evlist(rec, rec->evlist, false, synch);
cb21686b
WN
861 if (err)
862 return err;
863
470530bb 864 return record__mmap_read_evlist(rec, rec->evlist, true, synch);
cb21686b
WN
865}
866
8c6f45a7 867static void record__init_features(struct record *rec)
57706abc 868{
57706abc
DA
869 struct perf_session *session = rec->session;
870 int feat;
871
872 for (feat = HEADER_FIRST_FEATURE; feat < HEADER_LAST_FEATURE; feat++)
873 perf_header__set_feat(&session->header, feat);
874
875 if (rec->no_buildid)
876 perf_header__clear_feat(&session->header, HEADER_BUILD_ID);
877
3e2be2da 878 if (!have_tracepoints(&rec->evlist->entries))
57706abc
DA
879 perf_header__clear_feat(&session->header, HEADER_TRACING_DATA);
880
881 if (!rec->opts.branch_stack)
882 perf_header__clear_feat(&session->header, HEADER_BRANCH_STACK);
ef149c25
AH
883
884 if (!rec->opts.full_auxtrace)
885 perf_header__clear_feat(&session->header, HEADER_AUXTRACE);
ffa517ad 886
cf790516
AB
887 if (!(rec->opts.use_clockid && rec->opts.clockid_res_ns))
888 perf_header__clear_feat(&session->header, HEADER_CLOCKID);
889
258031c0
JO
890 perf_header__clear_feat(&session->header, HEADER_DIR_FORMAT);
891
ffa517ad 892 perf_header__clear_feat(&session->header, HEADER_STAT);
57706abc
DA
893}
894
e1ab48ba
WN
895static void
896record__finish_output(struct record *rec)
897{
8ceb41d7
JO
898 struct perf_data *data = &rec->data;
899 int fd = perf_data__fd(data);
e1ab48ba 900
8ceb41d7 901 if (data->is_pipe)
e1ab48ba
WN
902 return;
903
904 rec->session->header.data_size += rec->bytes_written;
45112e89 905 data->file.size = lseek(perf_data__fd(data), 0, SEEK_CUR);
e1ab48ba
WN
906
907 if (!rec->no_buildid) {
908 process_buildids(rec);
909
910 if (rec->buildid_all)
911 dsos__hit_all(rec->session);
912 }
913 perf_session__write_header(rec->session, rec->evlist, fd, true);
914
915 return;
916}
917
4ea648ae 918static int record__synthesize_workload(struct record *rec, bool tail)
be7b0c9e 919{
9d6aae72
ACM
920 int err;
921 struct thread_map *thread_map;
be7b0c9e 922
4ea648ae
WN
923 if (rec->opts.tail_synthesize != tail)
924 return 0;
925
9d6aae72
ACM
926 thread_map = thread_map__new_by_tid(rec->evlist->workload.pid);
927 if (thread_map == NULL)
928 return -1;
929
930 err = perf_event__synthesize_thread_map(&rec->tool, thread_map,
be7b0c9e
WN
931 process_synthesized_event,
932 &rec->session->machines.host,
3fcb10e4 933 rec->opts.sample_address);
9d6aae72
ACM
934 thread_map__put(thread_map);
935 return err;
be7b0c9e
WN
936}
937
4ea648ae 938static int record__synthesize(struct record *rec, bool tail);
3c1cb7e3 939
ecfd7a9c
WN
940static int
941record__switch_output(struct record *rec, bool at_exit)
942{
8ceb41d7 943 struct perf_data *data = &rec->data;
ecfd7a9c 944 int fd, err;
03724b2e 945 char *new_filename;
ecfd7a9c
WN
946
947 /* Same Size: "2015122520103046"*/
948 char timestamp[] = "InvalidTimestamp";
949
d3d1af6f
AB
950 record__aio_mmap_read_sync(rec);
951
4ea648ae
WN
952 record__synthesize(rec, true);
953 if (target__none(&rec->opts.target))
954 record__synthesize_workload(rec, true);
955
ecfd7a9c
WN
956 rec->samples = 0;
957 record__finish_output(rec);
958 err = fetch_current_timestamp(timestamp, sizeof(timestamp));
959 if (err) {
960 pr_err("Failed to get current timestamp\n");
961 return -EINVAL;
962 }
963
8ceb41d7 964 fd = perf_data__switch(data, timestamp,
ecfd7a9c 965 rec->session->header.data_offset,
03724b2e 966 at_exit, &new_filename);
ecfd7a9c
WN
967 if (fd >= 0 && !at_exit) {
968 rec->bytes_written = 0;
969 rec->session->header.data_size = 0;
970 }
971
972 if (!quiet)
973 fprintf(stderr, "[ perf record: Dump %s.%s ]\n",
2d4f2799 974 data->path, timestamp);
3c1cb7e3 975
03724b2e
AK
976 if (rec->switch_output.num_files) {
977 int n = rec->switch_output.cur_file + 1;
978
979 if (n >= rec->switch_output.num_files)
980 n = 0;
981 rec->switch_output.cur_file = n;
982 if (rec->switch_output.filenames[n]) {
983 remove(rec->switch_output.filenames[n]);
984 free(rec->switch_output.filenames[n]);
985 }
986 rec->switch_output.filenames[n] = new_filename;
987 } else {
988 free(new_filename);
989 }
990
3c1cb7e3 991 /* Output tracking events */
be7b0c9e 992 if (!at_exit) {
4ea648ae 993 record__synthesize(rec, false);
3c1cb7e3 994
be7b0c9e
WN
995 /*
996 * In 'perf record --switch-output' without -a,
997 * record__synthesize() in record__switch_output() won't
998 * generate tracking events because there's no thread_map
999 * in evlist. Which causes newly created perf.data doesn't
1000 * contain map and comm information.
1001 * Create a fake thread_map and directly call
1002 * perf_event__synthesize_thread_map() for those events.
1003 */
1004 if (target__none(&rec->opts.target))
4ea648ae 1005 record__synthesize_workload(rec, false);
be7b0c9e 1006 }
ecfd7a9c
WN
1007 return fd;
1008}
1009
f33cbe72
ACM
1010static volatile int workload_exec_errno;
1011
1012/*
1013 * perf_evlist__prepare_workload will send a SIGUSR1
1014 * if the fork fails, since we asked by setting its
1015 * want_signal to true.
1016 */
45604710
NK
1017static void workload_exec_failed_signal(int signo __maybe_unused,
1018 siginfo_t *info,
f33cbe72
ACM
1019 void *ucontext __maybe_unused)
1020{
1021 workload_exec_errno = info->si_value.sival_int;
1022 done = 1;
f33cbe72
ACM
1023 child_finished = 1;
1024}
1025
2dd6d8a1 1026static void snapshot_sig_handler(int sig);
bfacbe3b 1027static void alarm_sig_handler(int sig);
2dd6d8a1 1028
46bc29b9
AH
1029int __weak
1030perf_event__synth_time_conv(const struct perf_event_mmap_page *pc __maybe_unused,
1031 struct perf_tool *tool __maybe_unused,
1032 perf_event__handler_t process __maybe_unused,
1033 struct machine *machine __maybe_unused)
1034{
1035 return 0;
1036}
1037
ee667f94
WN
1038static const struct perf_event_mmap_page *
1039perf_evlist__pick_pc(struct perf_evlist *evlist)
1040{
b2cb615d
WN
1041 if (evlist) {
1042 if (evlist->mmap && evlist->mmap[0].base)
1043 return evlist->mmap[0].base;
0b72d69a
WN
1044 if (evlist->overwrite_mmap && evlist->overwrite_mmap[0].base)
1045 return evlist->overwrite_mmap[0].base;
b2cb615d 1046 }
ee667f94
WN
1047 return NULL;
1048}
1049
c45628b0
WN
1050static const struct perf_event_mmap_page *record__pick_pc(struct record *rec)
1051{
ee667f94
WN
1052 const struct perf_event_mmap_page *pc;
1053
1054 pc = perf_evlist__pick_pc(rec->evlist);
1055 if (pc)
1056 return pc;
c45628b0
WN
1057 return NULL;
1058}
1059
4ea648ae 1060static int record__synthesize(struct record *rec, bool tail)
c45c86eb
WN
1061{
1062 struct perf_session *session = rec->session;
1063 struct machine *machine = &session->machines.host;
8ceb41d7 1064 struct perf_data *data = &rec->data;
c45c86eb
WN
1065 struct record_opts *opts = &rec->opts;
1066 struct perf_tool *tool = &rec->tool;
8ceb41d7 1067 int fd = perf_data__fd(data);
c45c86eb
WN
1068 int err = 0;
1069
4ea648ae
WN
1070 if (rec->opts.tail_synthesize != tail)
1071 return 0;
1072
8ceb41d7 1073 if (data->is_pipe) {
a2015516
JO
1074 /*
1075 * We need to synthesize events first, because some
1076 * features works on top of them (on report side).
1077 */
318ec184 1078 err = perf_event__synthesize_attrs(tool, rec->evlist,
c45c86eb
WN
1079 process_synthesized_event);
1080 if (err < 0) {
1081 pr_err("Couldn't synthesize attrs.\n");
1082 goto out;
1083 }
1084
a2015516
JO
1085 err = perf_event__synthesize_features(tool, session, rec->evlist,
1086 process_synthesized_event);
1087 if (err < 0) {
1088 pr_err("Couldn't synthesize features.\n");
1089 return err;
1090 }
1091
c45c86eb
WN
1092 if (have_tracepoints(&rec->evlist->entries)) {
1093 /*
1094 * FIXME err <= 0 here actually means that
1095 * there were no tracepoints so its not really
1096 * an error, just that we don't need to
1097 * synthesize anything. We really have to
1098 * return this more properly and also
1099 * propagate errors that now are calling die()
1100 */
1101 err = perf_event__synthesize_tracing_data(tool, fd, rec->evlist,
1102 process_synthesized_event);
1103 if (err <= 0) {
1104 pr_err("Couldn't record tracing data.\n");
1105 goto out;
1106 }
1107 rec->bytes_written += err;
1108 }
1109 }
1110
c45628b0 1111 err = perf_event__synth_time_conv(record__pick_pc(rec), tool,
46bc29b9
AH
1112 process_synthesized_event, machine);
1113 if (err)
1114 goto out;
1115
c45c86eb
WN
1116 if (rec->opts.full_auxtrace) {
1117 err = perf_event__synthesize_auxtrace_info(rec->itr, tool,
1118 session, process_synthesized_event);
1119 if (err)
1120 goto out;
1121 }
1122
6c443954
ACM
1123 if (!perf_evlist__exclude_kernel(rec->evlist)) {
1124 err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event,
1125 machine);
1126 WARN_ONCE(err < 0, "Couldn't record kernel reference relocation symbol\n"
1127 "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n"
1128 "Check /proc/kallsyms permission or run as root.\n");
1129
1130 err = perf_event__synthesize_modules(tool, process_synthesized_event,
1131 machine);
1132 WARN_ONCE(err < 0, "Couldn't record kernel module information.\n"
1133 "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n"
1134 "Check /proc/modules permission or run as root.\n");
1135 }
c45c86eb
WN
1136
1137 if (perf_guest) {
1138 machines__process_guests(&session->machines,
1139 perf_event__synthesize_guest_os, tool);
1140 }
1141
bfd8f72c
AK
1142 err = perf_event__synthesize_extra_attr(&rec->tool,
1143 rec->evlist,
1144 process_synthesized_event,
1145 data->is_pipe);
1146 if (err)
1147 goto out;
1148
373565d2
AK
1149 err = perf_event__synthesize_thread_map2(&rec->tool, rec->evlist->threads,
1150 process_synthesized_event,
1151 NULL);
1152 if (err < 0) {
1153 pr_err("Couldn't synthesize thread map.\n");
1154 return err;
1155 }
1156
1157 err = perf_event__synthesize_cpu_map(&rec->tool, rec->evlist->cpus,
1158 process_synthesized_event, NULL);
1159 if (err < 0) {
1160 pr_err("Couldn't synthesize cpu map.\n");
1161 return err;
1162 }
1163
e5416950 1164 err = perf_event__synthesize_bpf_events(session, process_synthesized_event,
7b612e29
SL
1165 machine, opts);
1166 if (err < 0)
1167 pr_warning("Couldn't synthesize bpf events.\n");
1168
c45c86eb
WN
1169 err = __machine__synthesize_threads(machine, tool, &opts->target, rec->evlist->threads,
1170 process_synthesized_event, opts->sample_address,
3fcb10e4 1171 1);
c45c86eb
WN
1172out:
1173 return err;
1174}
1175
8c6f45a7 1176static int __cmd_record(struct record *rec, int argc, const char **argv)
16c8a109 1177{
57706abc 1178 int err;
45604710 1179 int status = 0;
8b412664 1180 unsigned long waking = 0;
46be604b 1181 const bool forks = argc > 0;
45694aa7 1182 struct perf_tool *tool = &rec->tool;
b4006796 1183 struct record_opts *opts = &rec->opts;
8ceb41d7 1184 struct perf_data *data = &rec->data;
d20deb64 1185 struct perf_session *session;
6dcf45ef 1186 bool disabled = false, draining = false;
657ee553 1187 struct perf_evlist *sb_evlist = NULL;
42aa276f 1188 int fd;
d3c8c08e 1189 float ratio = 0;
de9ac07b 1190
45604710 1191 atexit(record__sig_exit);
f5970550
PZ
1192 signal(SIGCHLD, sig_handler);
1193 signal(SIGINT, sig_handler);
804f7ac7 1194 signal(SIGTERM, sig_handler);
a074865e 1195 signal(SIGSEGV, sigsegv_handler);
c0bdc1c4 1196
f3b3614a
HB
1197 if (rec->opts.record_namespaces)
1198 tool->namespace_events = true;
1199
dc0c6127 1200 if (rec->opts.auxtrace_snapshot_mode || rec->switch_output.enabled) {
2dd6d8a1 1201 signal(SIGUSR2, snapshot_sig_handler);
3c1cb7e3
WN
1202 if (rec->opts.auxtrace_snapshot_mode)
1203 trigger_on(&auxtrace_snapshot_trigger);
dc0c6127 1204 if (rec->switch_output.enabled)
3c1cb7e3 1205 trigger_on(&switch_output_trigger);
c0bdc1c4 1206 } else {
2dd6d8a1 1207 signal(SIGUSR2, SIG_IGN);
c0bdc1c4 1208 }
f5970550 1209
8ceb41d7 1210 session = perf_session__new(data, false, tool);
94c744b6 1211 if (session == NULL) {
ffa91880 1212 pr_err("Perf session creation failed.\n");
a9a70bbc
ACM
1213 return -1;
1214 }
1215
8ceb41d7 1216 fd = perf_data__fd(data);
d20deb64
ACM
1217 rec->session = session;
1218
8c6f45a7 1219 record__init_features(rec);
330aa675 1220
cf790516
AB
1221 if (rec->opts.use_clockid && rec->opts.clockid_res_ns)
1222 session->header.env.clockid_res_ns = rec->opts.clockid_res_ns;
1223
d4db3f16 1224 if (forks) {
3e2be2da 1225 err = perf_evlist__prepare_workload(rec->evlist, &opts->target,
8ceb41d7 1226 argv, data->is_pipe,
735f7e0b 1227 workload_exec_failed_signal);
35b9d88e
ACM
1228 if (err < 0) {
1229 pr_err("Couldn't run the workload!\n");
45604710 1230 status = err;
35b9d88e 1231 goto out_delete_session;
856e9660 1232 }
856e9660
PZ
1233 }
1234
ad46e48c
JO
1235 /*
1236 * If we have just single event and are sending data
1237 * through pipe, we need to force the ids allocation,
1238 * because we synthesize event name through the pipe
1239 * and need the id for that.
1240 */
1241 if (data->is_pipe && rec->evlist->nr_entries == 1)
1242 rec->opts.sample_id = true;
1243
8c6f45a7 1244 if (record__open(rec) != 0) {
8d3eca20 1245 err = -1;
45604710 1246 goto out_child;
8d3eca20 1247 }
de9ac07b 1248
8690a2a7
WN
1249 err = bpf__apply_obj_config();
1250 if (err) {
1251 char errbuf[BUFSIZ];
1252
1253 bpf__strerror_apply_obj_config(err, errbuf, sizeof(errbuf));
1254 pr_err("ERROR: Apply config to BPF failed: %s\n",
1255 errbuf);
1256 goto out_child;
1257 }
1258
cca8482c
AH
1259 /*
1260 * Normally perf_session__new would do this, but it doesn't have the
1261 * evlist.
1262 */
1263 if (rec->tool.ordered_events && !perf_evlist__sample_id_all(rec->evlist)) {
1264 pr_warning("WARNING: No sample_id_all support, falling back to unordered processing\n");
1265 rec->tool.ordered_events = false;
1266 }
1267
3e2be2da 1268 if (!rec->evlist->nr_groups)
a8bb559b
NK
1269 perf_header__clear_feat(&session->header, HEADER_GROUP_DESC);
1270
8ceb41d7 1271 if (data->is_pipe) {
42aa276f 1272 err = perf_header__write_pipe(fd);
529870e3 1273 if (err < 0)
45604710 1274 goto out_child;
563aecb2 1275 } else {
42aa276f 1276 err = perf_session__write_header(session, rec->evlist, fd, false);
d5eed904 1277 if (err < 0)
45604710 1278 goto out_child;
56b03f3c
ACM
1279 }
1280
d3665498 1281 if (!rec->no_buildid
e20960c0 1282 && !perf_header__has_feat(&session->header, HEADER_BUILD_ID)) {
d3665498 1283 pr_err("Couldn't generate buildids. "
e20960c0 1284 "Use --no-buildid to profile anyway.\n");
8d3eca20 1285 err = -1;
45604710 1286 goto out_child;
e20960c0
RR
1287 }
1288
d56354dc
SL
1289 if (!opts->no_bpf_event)
1290 bpf_event__add_sb_event(&sb_evlist, &session->header.env);
1291
657ee553
SL
1292 if (perf_evlist__start_sb_thread(sb_evlist, &rec->opts.target)) {
1293 pr_debug("Couldn't start the BPF side band thread:\nBPF programs starting from now on won't be annotatable\n");
1294 opts->no_bpf_event = true;
1295 }
1296
4ea648ae 1297 err = record__synthesize(rec, false);
c45c86eb 1298 if (err < 0)
45604710 1299 goto out_child;
8d3eca20 1300
d20deb64 1301 if (rec->realtime_prio) {
de9ac07b
PZ
1302 struct sched_param param;
1303
d20deb64 1304 param.sched_priority = rec->realtime_prio;
de9ac07b 1305 if (sched_setscheduler(0, SCHED_FIFO, &param)) {
6beba7ad 1306 pr_err("Could not set realtime priority.\n");
8d3eca20 1307 err = -1;
45604710 1308 goto out_child;
de9ac07b
PZ
1309 }
1310 }
1311
774cb499
JO
1312 /*
1313 * When perf is starting the traced process, all the events
1314 * (apart from group members) have enable_on_exec=1 set,
1315 * so don't spoil it by prematurely enabling them.
1316 */
6619a53e 1317 if (!target__none(&opts->target) && !opts->initial_delay)
3e2be2da 1318 perf_evlist__enable(rec->evlist);
764e16a3 1319
856e9660
PZ
1320 /*
1321 * Let the child rip
1322 */
e803cf97 1323 if (forks) {
20a8a3cf 1324 struct machine *machine = &session->machines.host;
e5bed564 1325 union perf_event *event;
e907caf3 1326 pid_t tgid;
e5bed564
NK
1327
1328 event = malloc(sizeof(event->comm) + machine->id_hdr_size);
1329 if (event == NULL) {
1330 err = -ENOMEM;
1331 goto out_child;
1332 }
1333
e803cf97
NK
1334 /*
1335 * Some H/W events are generated before COMM event
1336 * which is emitted during exec(), so perf script
1337 * cannot see a correct process name for those events.
1338 * Synthesize COMM event to prevent it.
1339 */
e907caf3
HB
1340 tgid = perf_event__synthesize_comm(tool, event,
1341 rec->evlist->workload.pid,
1342 process_synthesized_event,
1343 machine);
1344 free(event);
1345
1346 if (tgid == -1)
1347 goto out_child;
1348
1349 event = malloc(sizeof(event->namespaces) +
1350 (NR_NAMESPACES * sizeof(struct perf_ns_link_info)) +
1351 machine->id_hdr_size);
1352 if (event == NULL) {
1353 err = -ENOMEM;
1354 goto out_child;
1355 }
1356
1357 /*
1358 * Synthesize NAMESPACES event for the command specified.
1359 */
1360 perf_event__synthesize_namespaces(tool, event,
1361 rec->evlist->workload.pid,
1362 tgid, process_synthesized_event,
1363 machine);
e5bed564 1364 free(event);
e803cf97 1365
3e2be2da 1366 perf_evlist__start_workload(rec->evlist);
e803cf97 1367 }
856e9660 1368
6619a53e 1369 if (opts->initial_delay) {
0693e680 1370 usleep(opts->initial_delay * USEC_PER_MSEC);
6619a53e
AK
1371 perf_evlist__enable(rec->evlist);
1372 }
1373
5f9cf599 1374 trigger_ready(&auxtrace_snapshot_trigger);
3c1cb7e3 1375 trigger_ready(&switch_output_trigger);
a074865e 1376 perf_hooks__invoke_record_start();
649c48a9 1377 for (;;) {
9f065194 1378 unsigned long long hits = rec->samples;
de9ac07b 1379
05737464
WN
1380 /*
1381 * rec->evlist->bkw_mmap_state is possible to be
1382 * BKW_MMAP_EMPTY here: when done == true and
1383 * hits != rec->samples in previous round.
1384 *
1385 * perf_evlist__toggle_bkw_mmap ensure we never
1386 * convert BKW_MMAP_EMPTY to BKW_MMAP_DATA_PENDING.
1387 */
1388 if (trigger_is_hit(&switch_output_trigger) || done || draining)
1389 perf_evlist__toggle_bkw_mmap(rec->evlist, BKW_MMAP_DATA_PENDING);
1390
470530bb 1391 if (record__mmap_read_all(rec, false) < 0) {
5f9cf599 1392 trigger_error(&auxtrace_snapshot_trigger);
3c1cb7e3 1393 trigger_error(&switch_output_trigger);
8d3eca20 1394 err = -1;
45604710 1395 goto out_child;
8d3eca20 1396 }
de9ac07b 1397
2dd6d8a1
AH
1398 if (auxtrace_record__snapshot_started) {
1399 auxtrace_record__snapshot_started = 0;
5f9cf599 1400 if (!trigger_is_error(&auxtrace_snapshot_trigger))
2dd6d8a1 1401 record__read_auxtrace_snapshot(rec);
5f9cf599 1402 if (trigger_is_error(&auxtrace_snapshot_trigger)) {
2dd6d8a1
AH
1403 pr_err("AUX area tracing snapshot failed\n");
1404 err = -1;
1405 goto out_child;
1406 }
1407 }
1408
3c1cb7e3 1409 if (trigger_is_hit(&switch_output_trigger)) {
05737464
WN
1410 /*
1411 * If switch_output_trigger is hit, the data in
1412 * overwritable ring buffer should have been collected,
1413 * so bkw_mmap_state should be set to BKW_MMAP_EMPTY.
1414 *
1415 * If SIGUSR2 raise after or during record__mmap_read_all(),
1416 * record__mmap_read_all() didn't collect data from
1417 * overwritable ring buffer. Read again.
1418 */
1419 if (rec->evlist->bkw_mmap_state == BKW_MMAP_RUNNING)
1420 continue;
3c1cb7e3
WN
1421 trigger_ready(&switch_output_trigger);
1422
05737464
WN
1423 /*
1424 * Reenable events in overwrite ring buffer after
1425 * record__mmap_read_all(): we should have collected
1426 * data from it.
1427 */
1428 perf_evlist__toggle_bkw_mmap(rec->evlist, BKW_MMAP_RUNNING);
1429
3c1cb7e3
WN
1430 if (!quiet)
1431 fprintf(stderr, "[ perf record: dump data: Woken up %ld times ]\n",
1432 waking);
1433 waking = 0;
1434 fd = record__switch_output(rec, false);
1435 if (fd < 0) {
1436 pr_err("Failed to switch to new file\n");
1437 trigger_error(&switch_output_trigger);
1438 err = fd;
1439 goto out_child;
1440 }
bfacbe3b
JO
1441
1442 /* re-arm the alarm */
1443 if (rec->switch_output.time)
1444 alarm(rec->switch_output.time);
3c1cb7e3
WN
1445 }
1446
d20deb64 1447 if (hits == rec->samples) {
6dcf45ef 1448 if (done || draining)
649c48a9 1449 break;
f66a889d 1450 err = perf_evlist__poll(rec->evlist, -1);
a515114f
JO
1451 /*
1452 * Propagate error, only if there's any. Ignore positive
1453 * number of returned events and interrupt error.
1454 */
1455 if (err > 0 || (err < 0 && errno == EINTR))
45604710 1456 err = 0;
8b412664 1457 waking++;
6dcf45ef
ACM
1458
1459 if (perf_evlist__filter_pollfd(rec->evlist, POLLERR | POLLHUP) == 0)
1460 draining = true;
8b412664
PZ
1461 }
1462
774cb499
JO
1463 /*
1464 * When perf is starting the traced process, at the end events
1465 * die with the process and we wait for that. Thus no need to
1466 * disable events in this case.
1467 */
602ad878 1468 if (done && !disabled && !target__none(&opts->target)) {
5f9cf599 1469 trigger_off(&auxtrace_snapshot_trigger);
3e2be2da 1470 perf_evlist__disable(rec->evlist);
2711926a
JO
1471 disabled = true;
1472 }
de9ac07b 1473 }
5f9cf599 1474 trigger_off(&auxtrace_snapshot_trigger);
3c1cb7e3 1475 trigger_off(&switch_output_trigger);
de9ac07b 1476
f33cbe72 1477 if (forks && workload_exec_errno) {
35550da3 1478 char msg[STRERR_BUFSIZE];
c8b5f2c9 1479 const char *emsg = str_error_r(workload_exec_errno, msg, sizeof(msg));
f33cbe72
ACM
1480 pr_err("Workload failed: %s\n", emsg);
1481 err = -1;
45604710 1482 goto out_child;
f33cbe72
ACM
1483 }
1484
e3d59112 1485 if (!quiet)
45604710 1486 fprintf(stderr, "[ perf record: Woken up %ld times to write data ]\n", waking);
b44308f5 1487
4ea648ae
WN
1488 if (target__none(&rec->opts.target))
1489 record__synthesize_workload(rec, true);
1490
45604710 1491out_child:
470530bb 1492 record__mmap_read_all(rec, true);
d3d1af6f
AB
1493 record__aio_mmap_read_sync(rec);
1494
d3c8c08e
AB
1495 if (rec->session->bytes_transferred && rec->session->bytes_compressed) {
1496 ratio = (float)rec->session->bytes_transferred/(float)rec->session->bytes_compressed;
1497 session->header.env.comp_ratio = ratio + 0.5;
1498 }
1499
45604710
NK
1500 if (forks) {
1501 int exit_status;
addc2785 1502
45604710
NK
1503 if (!child_finished)
1504 kill(rec->evlist->workload.pid, SIGTERM);
1505
1506 wait(&exit_status);
1507
1508 if (err < 0)
1509 status = err;
1510 else if (WIFEXITED(exit_status))
1511 status = WEXITSTATUS(exit_status);
1512 else if (WIFSIGNALED(exit_status))
1513 signr = WTERMSIG(exit_status);
1514 } else
1515 status = err;
1516
4ea648ae 1517 record__synthesize(rec, true);
e3d59112
NK
1518 /* this will be recalculated during process_buildids() */
1519 rec->samples = 0;
1520
ecfd7a9c
WN
1521 if (!err) {
1522 if (!rec->timestamp_filename) {
1523 record__finish_output(rec);
1524 } else {
1525 fd = record__switch_output(rec, true);
1526 if (fd < 0) {
1527 status = fd;
1528 goto out_delete_session;
1529 }
1530 }
1531 }
39d17dac 1532
a074865e
WN
1533 perf_hooks__invoke_record_end();
1534
e3d59112
NK
1535 if (!err && !quiet) {
1536 char samples[128];
ecfd7a9c
WN
1537 const char *postfix = rec->timestamp_filename ?
1538 ".<timestamp>" : "";
e3d59112 1539
ef149c25 1540 if (rec->samples && !rec->opts.full_auxtrace)
e3d59112
NK
1541 scnprintf(samples, sizeof(samples),
1542 " (%" PRIu64 " samples)", rec->samples);
1543 else
1544 samples[0] = '\0';
1545
d3c8c08e 1546 fprintf(stderr, "[ perf record: Captured and wrote %.3f MB %s%s%s",
8ceb41d7 1547 perf_data__size(data) / 1024.0 / 1024.0,
2d4f2799 1548 data->path, postfix, samples);
d3c8c08e
AB
1549 if (ratio) {
1550 fprintf(stderr, ", compressed (original %.3f MB, ratio is %.3f)",
1551 rec->session->bytes_transferred / 1024.0 / 1024.0,
1552 ratio);
1553 }
1554 fprintf(stderr, " ]\n");
e3d59112
NK
1555 }
1556
39d17dac
ACM
1557out_delete_session:
1558 perf_session__delete(session);
657ee553
SL
1559
1560 if (!opts->no_bpf_event)
1561 perf_evlist__stop_sb_thread(sb_evlist);
45604710 1562 return status;
de9ac07b 1563}
0e9b20b8 1564
0883e820 1565static void callchain_debug(struct callchain_param *callchain)
09b0fd45 1566{
aad2b21c 1567 static const char *str[CALLCHAIN_MAX] = { "NONE", "FP", "DWARF", "LBR" };
a601fdff 1568
0883e820 1569 pr_debug("callchain: type %s\n", str[callchain->record_mode]);
26d33022 1570
0883e820 1571 if (callchain->record_mode == CALLCHAIN_DWARF)
09b0fd45 1572 pr_debug("callchain: stack dump size %d\n",
0883e820 1573 callchain->dump_size);
09b0fd45
JO
1574}
1575
0883e820
ACM
1576int record_opts__parse_callchain(struct record_opts *record,
1577 struct callchain_param *callchain,
1578 const char *arg, bool unset)
09b0fd45 1579{
09b0fd45 1580 int ret;
0883e820 1581 callchain->enabled = !unset;
eb853e80 1582
09b0fd45
JO
1583 /* --no-call-graph */
1584 if (unset) {
0883e820 1585 callchain->record_mode = CALLCHAIN_NONE;
09b0fd45
JO
1586 pr_debug("callchain: disabled\n");
1587 return 0;
1588 }
1589
0883e820 1590 ret = parse_callchain_record_opt(arg, callchain);
5c0cf224
JO
1591 if (!ret) {
1592 /* Enable data address sampling for DWARF unwind. */
0883e820 1593 if (callchain->record_mode == CALLCHAIN_DWARF)
5c0cf224 1594 record->sample_address = true;
0883e820 1595 callchain_debug(callchain);
5c0cf224 1596 }
26d33022
JO
1597
1598 return ret;
1599}
1600
0883e820
ACM
1601int record_parse_callchain_opt(const struct option *opt,
1602 const char *arg,
1603 int unset)
1604{
1605 return record_opts__parse_callchain(opt->value, &callchain_param, arg, unset);
1606}
1607
c421e80b 1608int record_callchain_opt(const struct option *opt,
09b0fd45
JO
1609 const char *arg __maybe_unused,
1610 int unset __maybe_unused)
1611{
2ddd5c04 1612 struct callchain_param *callchain = opt->value;
c421e80b 1613
2ddd5c04 1614 callchain->enabled = true;
09b0fd45 1615
2ddd5c04
ACM
1616 if (callchain->record_mode == CALLCHAIN_NONE)
1617 callchain->record_mode = CALLCHAIN_FP;
eb853e80 1618
2ddd5c04 1619 callchain_debug(callchain);
09b0fd45
JO
1620 return 0;
1621}
1622
eb853e80
JO
1623static int perf_record_config(const char *var, const char *value, void *cb)
1624{
7a29c087
NK
1625 struct record *rec = cb;
1626
1627 if (!strcmp(var, "record.build-id")) {
1628 if (!strcmp(value, "cache"))
1629 rec->no_buildid_cache = false;
1630 else if (!strcmp(value, "no-cache"))
1631 rec->no_buildid_cache = true;
1632 else if (!strcmp(value, "skip"))
1633 rec->no_buildid = true;
1634 else
1635 return -1;
1636 return 0;
1637 }
cff17205
YX
1638 if (!strcmp(var, "record.call-graph")) {
1639 var = "call-graph.record-mode";
1640 return perf_default_config(var, value, cb);
1641 }
93f20c0f
AB
1642#ifdef HAVE_AIO_SUPPORT
1643 if (!strcmp(var, "record.aio")) {
1644 rec->opts.nr_cblocks = strtol(value, NULL, 0);
1645 if (!rec->opts.nr_cblocks)
1646 rec->opts.nr_cblocks = nr_cblocks_default;
1647 }
1648#endif
eb853e80 1649
cff17205 1650 return 0;
eb853e80
JO
1651}
1652
814c8c38
PZ
1653struct clockid_map {
1654 const char *name;
1655 int clockid;
1656};
1657
1658#define CLOCKID_MAP(n, c) \
1659 { .name = n, .clockid = (c), }
1660
1661#define CLOCKID_END { .name = NULL, }
1662
1663
1664/*
1665 * Add the missing ones, we need to build on many distros...
1666 */
1667#ifndef CLOCK_MONOTONIC_RAW
1668#define CLOCK_MONOTONIC_RAW 4
1669#endif
1670#ifndef CLOCK_BOOTTIME
1671#define CLOCK_BOOTTIME 7
1672#endif
1673#ifndef CLOCK_TAI
1674#define CLOCK_TAI 11
1675#endif
1676
1677static const struct clockid_map clockids[] = {
1678 /* available for all events, NMI safe */
1679 CLOCKID_MAP("monotonic", CLOCK_MONOTONIC),
1680 CLOCKID_MAP("monotonic_raw", CLOCK_MONOTONIC_RAW),
1681
1682 /* available for some events */
1683 CLOCKID_MAP("realtime", CLOCK_REALTIME),
1684 CLOCKID_MAP("boottime", CLOCK_BOOTTIME),
1685 CLOCKID_MAP("tai", CLOCK_TAI),
1686
1687 /* available for the lazy */
1688 CLOCKID_MAP("mono", CLOCK_MONOTONIC),
1689 CLOCKID_MAP("raw", CLOCK_MONOTONIC_RAW),
1690 CLOCKID_MAP("real", CLOCK_REALTIME),
1691 CLOCKID_MAP("boot", CLOCK_BOOTTIME),
1692
1693 CLOCKID_END,
1694};
1695
cf790516
AB
1696static int get_clockid_res(clockid_t clk_id, u64 *res_ns)
1697{
1698 struct timespec res;
1699
1700 *res_ns = 0;
1701 if (!clock_getres(clk_id, &res))
1702 *res_ns = res.tv_nsec + res.tv_sec * NSEC_PER_SEC;
1703 else
1704 pr_warning("WARNING: Failed to determine specified clock resolution.\n");
1705
1706 return 0;
1707}
1708
814c8c38
PZ
1709static int parse_clockid(const struct option *opt, const char *str, int unset)
1710{
1711 struct record_opts *opts = (struct record_opts *)opt->value;
1712 const struct clockid_map *cm;
1713 const char *ostr = str;
1714
1715 if (unset) {
1716 opts->use_clockid = 0;
1717 return 0;
1718 }
1719
1720 /* no arg passed */
1721 if (!str)
1722 return 0;
1723
1724 /* no setting it twice */
1725 if (opts->use_clockid)
1726 return -1;
1727
1728 opts->use_clockid = true;
1729
1730 /* if its a number, we're done */
1731 if (sscanf(str, "%d", &opts->clockid) == 1)
cf790516 1732 return get_clockid_res(opts->clockid, &opts->clockid_res_ns);
814c8c38
PZ
1733
1734 /* allow a "CLOCK_" prefix to the name */
1735 if (!strncasecmp(str, "CLOCK_", 6))
1736 str += 6;
1737
1738 for (cm = clockids; cm->name; cm++) {
1739 if (!strcasecmp(str, cm->name)) {
1740 opts->clockid = cm->clockid;
cf790516
AB
1741 return get_clockid_res(opts->clockid,
1742 &opts->clockid_res_ns);
814c8c38
PZ
1743 }
1744 }
1745
1746 opts->use_clockid = false;
1747 ui__warning("unknown clockid %s, check man page\n", ostr);
1748 return -1;
1749}
1750
f4fe11b7
AB
1751static int record__parse_affinity(const struct option *opt, const char *str, int unset)
1752{
1753 struct record_opts *opts = (struct record_opts *)opt->value;
1754
1755 if (unset || !str)
1756 return 0;
1757
1758 if (!strcasecmp(str, "node"))
1759 opts->affinity = PERF_AFFINITY_NODE;
1760 else if (!strcasecmp(str, "cpu"))
1761 opts->affinity = PERF_AFFINITY_CPU;
1762
1763 return 0;
1764}
1765
e9db1310
AH
1766static int record__parse_mmap_pages(const struct option *opt,
1767 const char *str,
1768 int unset __maybe_unused)
1769{
1770 struct record_opts *opts = opt->value;
1771 char *s, *p;
1772 unsigned int mmap_pages;
1773 int ret;
1774
1775 if (!str)
1776 return -EINVAL;
1777
1778 s = strdup(str);
1779 if (!s)
1780 return -ENOMEM;
1781
1782 p = strchr(s, ',');
1783 if (p)
1784 *p = '\0';
1785
1786 if (*s) {
1787 ret = __perf_evlist__parse_mmap_pages(&mmap_pages, s);
1788 if (ret)
1789 goto out_free;
1790 opts->mmap_pages = mmap_pages;
1791 }
1792
1793 if (!p) {
1794 ret = 0;
1795 goto out_free;
1796 }
1797
1798 ret = __perf_evlist__parse_mmap_pages(&mmap_pages, p + 1);
1799 if (ret)
1800 goto out_free;
1801
1802 opts->auxtrace_mmap_pages = mmap_pages;
1803
1804out_free:
1805 free(s);
1806 return ret;
1807}
1808
0c582449
JO
1809static void switch_output_size_warn(struct record *rec)
1810{
1811 u64 wakeup_size = perf_evlist__mmap_size(rec->opts.mmap_pages);
1812 struct switch_output *s = &rec->switch_output;
1813
1814 wakeup_size /= 2;
1815
1816 if (s->size < wakeup_size) {
1817 char buf[100];
1818
1819 unit_number__scnprintf(buf, sizeof(buf), wakeup_size);
1820 pr_warning("WARNING: switch-output data size lower than "
1821 "wakeup kernel buffer size (%s) "
1822 "expect bigger perf.data sizes\n", buf);
1823 }
1824}
1825
cb4e1ebb
JO
1826static int switch_output_setup(struct record *rec)
1827{
1828 struct switch_output *s = &rec->switch_output;
dc0c6127
JO
1829 static struct parse_tag tags_size[] = {
1830 { .tag = 'B', .mult = 1 },
1831 { .tag = 'K', .mult = 1 << 10 },
1832 { .tag = 'M', .mult = 1 << 20 },
1833 { .tag = 'G', .mult = 1 << 30 },
1834 { .tag = 0 },
1835 };
bfacbe3b
JO
1836 static struct parse_tag tags_time[] = {
1837 { .tag = 's', .mult = 1 },
1838 { .tag = 'm', .mult = 60 },
1839 { .tag = 'h', .mult = 60*60 },
1840 { .tag = 'd', .mult = 60*60*24 },
1841 { .tag = 0 },
1842 };
dc0c6127 1843 unsigned long val;
cb4e1ebb
JO
1844
1845 if (!s->set)
1846 return 0;
1847
1848 if (!strcmp(s->str, "signal")) {
1849 s->signal = true;
1850 pr_debug("switch-output with SIGUSR2 signal\n");
dc0c6127
JO
1851 goto enabled;
1852 }
1853
1854 val = parse_tag_value(s->str, tags_size);
1855 if (val != (unsigned long) -1) {
1856 s->size = val;
1857 pr_debug("switch-output with %s size threshold\n", s->str);
1858 goto enabled;
cb4e1ebb
JO
1859 }
1860
bfacbe3b
JO
1861 val = parse_tag_value(s->str, tags_time);
1862 if (val != (unsigned long) -1) {
1863 s->time = val;
1864 pr_debug("switch-output with %s time threshold (%lu seconds)\n",
1865 s->str, s->time);
1866 goto enabled;
1867 }
1868
cb4e1ebb 1869 return -1;
dc0c6127
JO
1870
1871enabled:
1872 rec->timestamp_filename = true;
1873 s->enabled = true;
0c582449
JO
1874
1875 if (s->size && !rec->opts.no_buffering)
1876 switch_output_size_warn(rec);
1877
dc0c6127 1878 return 0;
cb4e1ebb
JO
1879}
1880
e5b2c207 1881static const char * const __record_usage[] = {
9e096753
MG
1882 "perf record [<options>] [<command>]",
1883 "perf record [<options>] -- <command> [<options>]",
0e9b20b8
IM
1884 NULL
1885};
e5b2c207 1886const char * const *record_usage = __record_usage;
0e9b20b8 1887
d20deb64 1888/*
8c6f45a7
ACM
1889 * XXX Ideally would be local to cmd_record() and passed to a record__new
1890 * because we need to have access to it in record__exit, that is called
d20deb64
ACM
1891 * after cmd_record() exits, but since record_options need to be accessible to
1892 * builtin-script, leave it here.
1893 *
1894 * At least we don't ouch it in all the other functions here directly.
1895 *
1896 * Just say no to tons of global variables, sigh.
1897 */
8c6f45a7 1898static struct record record = {
d20deb64 1899 .opts = {
8affc2b8 1900 .sample_time = true,
d20deb64
ACM
1901 .mmap_pages = UINT_MAX,
1902 .user_freq = UINT_MAX,
1903 .user_interval = ULLONG_MAX,
447a6013 1904 .freq = 4000,
d1cb9fce
NK
1905 .target = {
1906 .uses_mmap = true,
3aa5939d 1907 .default_per_cpu = true,
d1cb9fce 1908 },
470530bb 1909 .mmap_flush = MMAP_FLUSH_DEFAULT,
d20deb64 1910 },
e3d59112
NK
1911 .tool = {
1912 .sample = process_sample_event,
1913 .fork = perf_event__process_fork,
cca8482c 1914 .exit = perf_event__process_exit,
e3d59112 1915 .comm = perf_event__process_comm,
f3b3614a 1916 .namespaces = perf_event__process_namespaces,
e3d59112
NK
1917 .mmap = perf_event__process_mmap,
1918 .mmap2 = perf_event__process_mmap2,
cca8482c 1919 .ordered_events = true,
e3d59112 1920 },
d20deb64 1921};
7865e817 1922
76a26549
NK
1923const char record_callchain_help[] = CALLCHAIN_RECORD_HELP
1924 "\n\t\t\t\tDefault: fp";
61eaa3be 1925
0aab2136
WN
1926static bool dry_run;
1927
d20deb64
ACM
1928/*
1929 * XXX Will stay a global variable till we fix builtin-script.c to stop messing
1930 * with it and switch to use the library functions in perf_evlist that came
b4006796 1931 * from builtin-record.c, i.e. use record_opts,
d20deb64
ACM
1932 * perf_evlist__prepare_workload, etc instead of fork+exec'in 'perf record',
1933 * using pipes, etc.
1934 */
efd21307 1935static struct option __record_options[] = {
d20deb64 1936 OPT_CALLBACK('e', "event", &record.evlist, "event",
86847b62 1937 "event selector. use 'perf list' to list available events",
f120f9d5 1938 parse_events_option),
d20deb64 1939 OPT_CALLBACK(0, "filter", &record.evlist, "filter",
c171b552 1940 "event filter", parse_filter),
4ba1faa1
WN
1941 OPT_CALLBACK_NOOPT(0, "exclude-perf", &record.evlist,
1942 NULL, "don't record events from perf itself",
1943 exclude_perf),
bea03405 1944 OPT_STRING('p', "pid", &record.opts.target.pid, "pid",
d6d901c2 1945 "record events on existing process id"),
bea03405 1946 OPT_STRING('t', "tid", &record.opts.target.tid, "tid",
d6d901c2 1947 "record events on existing thread id"),
d20deb64 1948 OPT_INTEGER('r', "realtime", &record.realtime_prio,
0e9b20b8 1949 "collect data with this RT SCHED_FIFO priority"),
509051ea 1950 OPT_BOOLEAN(0, "no-buffering", &record.opts.no_buffering,
acac03fa 1951 "collect data without buffering"),
d20deb64 1952 OPT_BOOLEAN('R', "raw-samples", &record.opts.raw_samples,
daac07b2 1953 "collect raw sample records from all opened counters"),
bea03405 1954 OPT_BOOLEAN('a', "all-cpus", &record.opts.target.system_wide,
0e9b20b8 1955 "system-wide collection from all CPUs"),
bea03405 1956 OPT_STRING('C', "cpu", &record.opts.target.cpu_list, "cpu",
c45c6ea2 1957 "list of cpus to monitor"),
d20deb64 1958 OPT_U64('c', "count", &record.opts.user_interval, "event period to sample"),
2d4f2799 1959 OPT_STRING('o', "output", &record.data.path, "file",
abaff32a 1960 "output file name"),
69e7e5b0
AH
1961 OPT_BOOLEAN_SET('i', "no-inherit", &record.opts.no_inherit,
1962 &record.opts.no_inherit_set,
1963 "child tasks do not inherit counters"),
4ea648ae
WN
1964 OPT_BOOLEAN(0, "tail-synthesize", &record.opts.tail_synthesize,
1965 "synthesize non-sample events at the end of output"),
626a6b78 1966 OPT_BOOLEAN(0, "overwrite", &record.opts.overwrite, "use overwrite mode"),
71184c6a 1967 OPT_BOOLEAN(0, "no-bpf-event", &record.opts.no_bpf_event, "record bpf events"),
b09c2364
ACM
1968 OPT_BOOLEAN(0, "strict-freq", &record.opts.strict_freq,
1969 "Fail if the specified frequency can't be used"),
67230479
ACM
1970 OPT_CALLBACK('F', "freq", &record.opts, "freq or 'max'",
1971 "profile at this frequency",
1972 record__parse_freq),
e9db1310
AH
1973 OPT_CALLBACK('m', "mmap-pages", &record.opts, "pages[,pages]",
1974 "number of mmap data pages and AUX area tracing mmap pages",
1975 record__parse_mmap_pages),
470530bb
AB
1976 OPT_CALLBACK(0, "mmap-flush", &record.opts, "number",
1977 "Minimal number of bytes that is extracted from mmap data pages (default: 1)",
1978 record__mmap_flush_parse),
d20deb64 1979 OPT_BOOLEAN(0, "group", &record.opts.group,
43bece79 1980 "put the counters into a counter group"),
2ddd5c04 1981 OPT_CALLBACK_NOOPT('g', NULL, &callchain_param,
09b0fd45
JO
1982 NULL, "enables call-graph recording" ,
1983 &record_callchain_opt),
1984 OPT_CALLBACK(0, "call-graph", &record.opts,
76a26549 1985 "record_mode[,record_size]", record_callchain_help,
09b0fd45 1986 &record_parse_callchain_opt),
c0555642 1987 OPT_INCR('v', "verbose", &verbose,
3da297a6 1988 "be more verbose (show counter open errors, etc)"),
b44308f5 1989 OPT_BOOLEAN('q', "quiet", &quiet, "don't print any message"),
d20deb64 1990 OPT_BOOLEAN('s', "stat", &record.opts.inherit_stat,
649c48a9 1991 "per thread counts"),
56100321 1992 OPT_BOOLEAN('d', "data", &record.opts.sample_address, "Record the sample addresses"),
3b0a5daa
KL
1993 OPT_BOOLEAN(0, "phys-data", &record.opts.sample_phys_addr,
1994 "Record the sample physical addresses"),
b6f35ed7 1995 OPT_BOOLEAN(0, "sample-cpu", &record.opts.sample_cpu, "Record the sample cpu"),
3abebc55
AH
1996 OPT_BOOLEAN_SET('T', "timestamp", &record.opts.sample_time,
1997 &record.opts.sample_time_set,
1998 "Record the sample timestamps"),
f290aa1f
JO
1999 OPT_BOOLEAN_SET('P', "period", &record.opts.period, &record.opts.period_set,
2000 "Record the sample period"),
d20deb64 2001 OPT_BOOLEAN('n', "no-samples", &record.opts.no_samples,
649c48a9 2002 "don't sample"),
d2db9a98
WN
2003 OPT_BOOLEAN_SET('N', "no-buildid-cache", &record.no_buildid_cache,
2004 &record.no_buildid_cache_set,
2005 "do not update the buildid cache"),
2006 OPT_BOOLEAN_SET('B', "no-buildid", &record.no_buildid,
2007 &record.no_buildid_set,
2008 "do not collect buildids in perf.data"),
d20deb64 2009 OPT_CALLBACK('G', "cgroup", &record.evlist, "name",
023695d9
SE
2010 "monitor event in cgroup name only",
2011 parse_cgroups),
a6205a35 2012 OPT_UINTEGER('D', "delay", &record.opts.initial_delay,
6619a53e 2013 "ms to wait before starting measurement after program start"),
bea03405
NK
2014 OPT_STRING('u', "uid", &record.opts.target.uid_str, "user",
2015 "user to profile"),
a5aabdac
SE
2016
2017 OPT_CALLBACK_NOOPT('b', "branch-any", &record.opts.branch_stack,
2018 "branch any", "sample any taken branches",
2019 parse_branch_stack),
2020
2021 OPT_CALLBACK('j', "branch-filter", &record.opts.branch_stack,
2022 "branch filter mask", "branch stack filter modes",
bdfebd84 2023 parse_branch_stack),
05484298
AK
2024 OPT_BOOLEAN('W', "weight", &record.opts.sample_weight,
2025 "sample by weight (on special events only)"),
475eeab9
AK
2026 OPT_BOOLEAN(0, "transaction", &record.opts.sample_transaction,
2027 "sample transaction flags (special events only)"),
3aa5939d
AH
2028 OPT_BOOLEAN(0, "per-thread", &record.opts.target.per_thread,
2029 "use per-thread mmaps"),
bcc84ec6
SE
2030 OPT_CALLBACK_OPTARG('I', "intr-regs", &record.opts.sample_intr_regs, NULL, "any register",
2031 "sample selected machine registers on interrupt,"
8e5bc76f 2032 " use '-I?' to list register names", parse_regs),
84c41742
AK
2033 OPT_CALLBACK_OPTARG(0, "user-regs", &record.opts.sample_user_regs, NULL, "any register",
2034 "sample selected machine registers on interrupt,"
8e5bc76f 2035 " use '-I?' to list register names", parse_regs),
85c273d2
AK
2036 OPT_BOOLEAN(0, "running-time", &record.opts.running_time,
2037 "Record running/enabled time of read (:S) events"),
814c8c38
PZ
2038 OPT_CALLBACK('k', "clockid", &record.opts,
2039 "clockid", "clockid to use for events, see clock_gettime()",
2040 parse_clockid),
2dd6d8a1
AH
2041 OPT_STRING_OPTARG('S', "snapshot", &record.opts.auxtrace_snapshot_opts,
2042 "opts", "AUX area tracing Snapshot Mode", ""),
3fcb10e4 2043 OPT_UINTEGER(0, "proc-map-timeout", &proc_map_timeout,
9d9cad76 2044 "per thread proc mmap processing timeout in ms"),
f3b3614a
HB
2045 OPT_BOOLEAN(0, "namespaces", &record.opts.record_namespaces,
2046 "Record namespaces events"),
b757bb09
AH
2047 OPT_BOOLEAN(0, "switch-events", &record.opts.record_switch_events,
2048 "Record context switch events"),
85723885
JO
2049 OPT_BOOLEAN_FLAG(0, "all-kernel", &record.opts.all_kernel,
2050 "Configure all used events to run in kernel space.",
2051 PARSE_OPT_EXCLUSIVE),
2052 OPT_BOOLEAN_FLAG(0, "all-user", &record.opts.all_user,
2053 "Configure all used events to run in user space.",
2054 PARSE_OPT_EXCLUSIVE),
71dc2326
WN
2055 OPT_STRING(0, "clang-path", &llvm_param.clang_path, "clang path",
2056 "clang binary to use for compiling BPF scriptlets"),
2057 OPT_STRING(0, "clang-opt", &llvm_param.clang_opt, "clang options",
2058 "options passed to clang when compiling BPF scriptlets"),
7efe0e03
HK
2059 OPT_STRING(0, "vmlinux", &symbol_conf.vmlinux_name,
2060 "file", "vmlinux pathname"),
6156681b
NK
2061 OPT_BOOLEAN(0, "buildid-all", &record.buildid_all,
2062 "Record build-id of all DSOs regardless of hits"),
ecfd7a9c
WN
2063 OPT_BOOLEAN(0, "timestamp-filename", &record.timestamp_filename,
2064 "append timestamp to output filename"),
68588baf
JY
2065 OPT_BOOLEAN(0, "timestamp-boundary", &record.timestamp_boundary,
2066 "Record timestamp boundary (time of first/last samples)"),
cb4e1ebb 2067 OPT_STRING_OPTARG_SET(0, "switch-output", &record.switch_output.str,
c38dab7d
AK
2068 &record.switch_output.set, "signal or size[BKMG] or time[smhd]",
2069 "Switch output when receiving SIGUSR2 (signal) or cross a size or time threshold",
dc0c6127 2070 "signal"),
03724b2e
AK
2071 OPT_INTEGER(0, "switch-max-files", &record.switch_output.num_files,
2072 "Limit number of switch output generated files"),
0aab2136
WN
2073 OPT_BOOLEAN(0, "dry-run", &dry_run,
2074 "Parse options then exit"),
d3d1af6f 2075#ifdef HAVE_AIO_SUPPORT
93f20c0f
AB
2076 OPT_CALLBACK_OPTARG(0, "aio", &record.opts,
2077 &nr_cblocks_default, "n", "Use <n> control blocks in asynchronous trace writing mode (default: 1, max: 4)",
d3d1af6f
AB
2078 record__aio_parse),
2079#endif
f4fe11b7
AB
2080 OPT_CALLBACK(0, "affinity", &record.opts, "node|cpu",
2081 "Set affinity mask of trace reading thread to NUMA node cpu mask or cpu of processed mmap buffer",
2082 record__parse_affinity),
0e9b20b8
IM
2083 OPT_END()
2084};
2085
e5b2c207
NK
2086struct option *record_options = __record_options;
2087
b0ad8ea6 2088int cmd_record(int argc, const char **argv)
0e9b20b8 2089{
ef149c25 2090 int err;
8c6f45a7 2091 struct record *rec = &record;
16ad2ffb 2092 char errbuf[BUFSIZ];
0e9b20b8 2093
67230479
ACM
2094 setlocale(LC_ALL, "");
2095
48e1cab1
WN
2096#ifndef HAVE_LIBBPF_SUPPORT
2097# define set_nobuild(s, l, c) set_option_nobuild(record_options, s, l, "NO_LIBBPF=1", c)
2098 set_nobuild('\0', "clang-path", true);
2099 set_nobuild('\0', "clang-opt", true);
2100# undef set_nobuild
7efe0e03
HK
2101#endif
2102
2103#ifndef HAVE_BPF_PROLOGUE
2104# if !defined (HAVE_DWARF_SUPPORT)
2105# define REASON "NO_DWARF=1"
2106# elif !defined (HAVE_LIBBPF_SUPPORT)
2107# define REASON "NO_LIBBPF=1"
2108# else
2109# define REASON "this architecture doesn't support BPF prologue"
2110# endif
2111# define set_nobuild(s, l, c) set_option_nobuild(record_options, s, l, REASON, c)
2112 set_nobuild('\0', "vmlinux", true);
2113# undef set_nobuild
2114# undef REASON
48e1cab1
WN
2115#endif
2116
9d2ed645
AB
2117 CPU_ZERO(&rec->affinity_mask);
2118 rec->opts.affinity = PERF_AFFINITY_SYS;
2119
3e2be2da
ACM
2120 rec->evlist = perf_evlist__new();
2121 if (rec->evlist == NULL)
361c99a6
ACM
2122 return -ENOMEM;
2123
ecc4c561
ACM
2124 err = perf_config(perf_record_config, rec);
2125 if (err)
2126 return err;
eb853e80 2127
bca647aa 2128 argc = parse_options(argc, argv, record_options, record_usage,
655000e7 2129 PARSE_OPT_STOP_AT_NON_OPTION);
68ba3235
NK
2130 if (quiet)
2131 perf_quiet_option();
483635a9
JO
2132
2133 /* Make system wide (-a) the default target. */
602ad878 2134 if (!argc && target__none(&rec->opts.target))
483635a9 2135 rec->opts.target.system_wide = true;
0e9b20b8 2136
bea03405 2137 if (nr_cgroups && !rec->opts.target.system_wide) {
c7118369
NK
2138 usage_with_options_msg(record_usage, record_options,
2139 "cgroup monitoring only available in system-wide mode");
2140
023695d9 2141 }
b757bb09
AH
2142 if (rec->opts.record_switch_events &&
2143 !perf_can_record_switch_events()) {
c7118369
NK
2144 ui__error("kernel does not support recording context switch events\n");
2145 parse_options_usage(record_usage, record_options, "switch-events", 0);
2146 return -EINVAL;
b757bb09 2147 }
023695d9 2148
cb4e1ebb
JO
2149 if (switch_output_setup(rec)) {
2150 parse_options_usage(record_usage, record_options, "switch-output", 0);
2151 return -EINVAL;
2152 }
2153
bfacbe3b
JO
2154 if (rec->switch_output.time) {
2155 signal(SIGALRM, alarm_sig_handler);
2156 alarm(rec->switch_output.time);
2157 }
2158
03724b2e
AK
2159 if (rec->switch_output.num_files) {
2160 rec->switch_output.filenames = calloc(sizeof(char *),
2161 rec->switch_output.num_files);
2162 if (!rec->switch_output.filenames)
2163 return -EINVAL;
2164 }
2165
1b36c03e
AH
2166 /*
2167 * Allow aliases to facilitate the lookup of symbols for address
2168 * filters. Refer to auxtrace_parse_filters().
2169 */
2170 symbol_conf.allow_aliases = true;
2171
2172 symbol__init(NULL);
2173
4b5ea3bd 2174 err = record__auxtrace_init(rec);
1b36c03e
AH
2175 if (err)
2176 goto out;
2177
0aab2136 2178 if (dry_run)
5c01ad60 2179 goto out;
0aab2136 2180
d7888573
WN
2181 err = bpf__setup_stdout(rec->evlist);
2182 if (err) {
2183 bpf__strerror_setup_stdout(rec->evlist, err, errbuf, sizeof(errbuf));
2184 pr_err("ERROR: Setup BPF stdout failed: %s\n",
2185 errbuf);
5c01ad60 2186 goto out;
d7888573
WN
2187 }
2188
ef149c25
AH
2189 err = -ENOMEM;
2190
6c443954 2191 if (symbol_conf.kptr_restrict && !perf_evlist__exclude_kernel(rec->evlist))
646aaea6
ACM
2192 pr_warning(
2193"WARNING: Kernel address maps (/proc/{kallsyms,modules}) are restricted,\n"
2194"check /proc/sys/kernel/kptr_restrict.\n\n"
2195"Samples in kernel functions may not be resolved if a suitable vmlinux\n"
2196"file is not found in the buildid cache or in the vmlinux path.\n\n"
2197"Samples in kernel modules won't be resolved at all.\n\n"
2198"If some relocation was applied (e.g. kexec) symbols may be misresolved\n"
2199"even with a suitable vmlinux or kallsyms file.\n\n");
ec80fde7 2200
0c1d46a8 2201 if (rec->no_buildid_cache || rec->no_buildid) {
a1ac1d3c 2202 disable_buildid_cache();
dc0c6127 2203 } else if (rec->switch_output.enabled) {
0c1d46a8
WN
2204 /*
2205 * In 'perf record --switch-output', disable buildid
2206 * generation by default to reduce data file switching
2207 * overhead. Still generate buildid if they are required
2208 * explicitly using
2209 *
60437ac0 2210 * perf record --switch-output --no-no-buildid \
0c1d46a8
WN
2211 * --no-no-buildid-cache
2212 *
2213 * Following code equals to:
2214 *
2215 * if ((rec->no_buildid || !rec->no_buildid_set) &&
2216 * (rec->no_buildid_cache || !rec->no_buildid_cache_set))
2217 * disable_buildid_cache();
2218 */
2219 bool disable = true;
2220
2221 if (rec->no_buildid_set && !rec->no_buildid)
2222 disable = false;
2223 if (rec->no_buildid_cache_set && !rec->no_buildid_cache)
2224 disable = false;
2225 if (disable) {
2226 rec->no_buildid = true;
2227 rec->no_buildid_cache = true;
2228 disable_buildid_cache();
2229 }
2230 }
655000e7 2231
4ea648ae
WN
2232 if (record.opts.overwrite)
2233 record.opts.tail_synthesize = true;
2234
3e2be2da 2235 if (rec->evlist->nr_entries == 0 &&
4b4cd503 2236 __perf_evlist__add_default(rec->evlist, !record.opts.no_samples) < 0) {
69aad6f1 2237 pr_err("Not enough memory for event selector list\n");
394c01ed 2238 goto out;
bbd36e5e 2239 }
0e9b20b8 2240
69e7e5b0
AH
2241 if (rec->opts.target.tid && !rec->opts.no_inherit_set)
2242 rec->opts.no_inherit = true;
2243
602ad878 2244 err = target__validate(&rec->opts.target);
16ad2ffb 2245 if (err) {
602ad878 2246 target__strerror(&rec->opts.target, err, errbuf, BUFSIZ);
c3dec27b 2247 ui__warning("%s\n", errbuf);
16ad2ffb
NK
2248 }
2249
602ad878 2250 err = target__parse_uid(&rec->opts.target);
16ad2ffb
NK
2251 if (err) {
2252 int saved_errno = errno;
4bd0f2d2 2253
602ad878 2254 target__strerror(&rec->opts.target, err, errbuf, BUFSIZ);
3780f488 2255 ui__error("%s", errbuf);
16ad2ffb
NK
2256
2257 err = -saved_errno;
394c01ed 2258 goto out;
16ad2ffb 2259 }
0d37aa34 2260
ca800068
MZ
2261 /* Enable ignoring missing threads when -u/-p option is defined. */
2262 rec->opts.ignore_missing_thread = rec->opts.target.uid != UINT_MAX || rec->opts.target.pid;
23dc4f15 2263
16ad2ffb 2264 err = -ENOMEM;
3e2be2da 2265 if (perf_evlist__create_maps(rec->evlist, &rec->opts.target) < 0)
dd7927f4 2266 usage_with_options(record_usage, record_options);
69aad6f1 2267
ef149c25
AH
2268 err = auxtrace_record__options(rec->itr, rec->evlist, &rec->opts);
2269 if (err)
394c01ed 2270 goto out;
ef149c25 2271
6156681b
NK
2272 /*
2273 * We take all buildids when the file contains
2274 * AUX area tracing data because we do not decode the
2275 * trace because it would take too long.
2276 */
2277 if (rec->opts.full_auxtrace)
2278 rec->buildid_all = true;
2279
b4006796 2280 if (record_opts__config(&rec->opts)) {
39d17dac 2281 err = -EINVAL;
394c01ed 2282 goto out;
7e4ff9e3
MG
2283 }
2284
93f20c0f
AB
2285 if (rec->opts.nr_cblocks > nr_cblocks_max)
2286 rec->opts.nr_cblocks = nr_cblocks_max;
d3d1af6f
AB
2287 if (verbose > 0)
2288 pr_info("nr_cblocks: %d\n", rec->opts.nr_cblocks);
2289
9d2ed645 2290 pr_debug("affinity: %s\n", affinity_tags[rec->opts.affinity]);
470530bb 2291 pr_debug("mmap flush: %d\n", rec->opts.mmap_flush);
9d2ed645 2292
d20deb64 2293 err = __cmd_record(&record, argc, argv);
394c01ed 2294out:
45604710 2295 perf_evlist__delete(rec->evlist);
d65a458b 2296 symbol__exit();
ef149c25 2297 auxtrace_record__free(rec->itr);
39d17dac 2298 return err;
0e9b20b8 2299}
2dd6d8a1
AH
2300
2301static void snapshot_sig_handler(int sig __maybe_unused)
2302{
dc0c6127
JO
2303 struct record *rec = &record;
2304
5f9cf599
WN
2305 if (trigger_is_ready(&auxtrace_snapshot_trigger)) {
2306 trigger_hit(&auxtrace_snapshot_trigger);
2307 auxtrace_record__snapshot_started = 1;
2308 if (auxtrace_record__snapshot_start(record.itr))
2309 trigger_error(&auxtrace_snapshot_trigger);
2310 }
3c1cb7e3 2311
dc0c6127 2312 if (switch_output_signal(rec))
3c1cb7e3 2313 trigger_hit(&switch_output_trigger);
2dd6d8a1 2314}
bfacbe3b
JO
2315
2316static void alarm_sig_handler(int sig __maybe_unused)
2317{
2318 struct record *rec = &record;
2319
2320 if (switch_output_time(rec))
2321 trigger_hit(&switch_output_trigger);
2322}