]> git.proxmox.com Git - mirror_ubuntu-hirsute-kernel.git/blame - tools/perf/builtin-stat.c
perf stat record: Synthesize event update events
[mirror_ubuntu-hirsute-kernel.git] / tools / perf / builtin-stat.c
CommitLineData
ddcacfa0 1/*
bf9e1876
IM
2 * builtin-stat.c
3 *
4 * Builtin stat command: Give a precise performance counters summary
5 * overview about any workload, CPU or specific PID.
6 *
7 * Sample output:
ddcacfa0 8
2cba3ffb 9 $ perf stat ./hackbench 10
ddcacfa0 10
2cba3ffb 11 Time: 0.118
ddcacfa0 12
2cba3ffb 13 Performance counter stats for './hackbench 10':
ddcacfa0 14
2cba3ffb
IM
15 1708.761321 task-clock # 11.037 CPUs utilized
16 41,190 context-switches # 0.024 M/sec
17 6,735 CPU-migrations # 0.004 M/sec
18 17,318 page-faults # 0.010 M/sec
19 5,205,202,243 cycles # 3.046 GHz
20 3,856,436,920 stalled-cycles-frontend # 74.09% frontend cycles idle
21 1,600,790,871 stalled-cycles-backend # 30.75% backend cycles idle
22 2,603,501,247 instructions # 0.50 insns per cycle
23 # 1.48 stalled cycles per insn
24 484,357,498 branches # 283.455 M/sec
25 6,388,934 branch-misses # 1.32% of all branches
26
27 0.154822978 seconds time elapsed
ddcacfa0 28
5242519b 29 *
2cba3ffb 30 * Copyright (C) 2008-2011, Red Hat Inc, Ingo Molnar <mingo@redhat.com>
5242519b
IM
31 *
32 * Improvements and fixes by:
33 *
34 * Arjan van de Ven <arjan@linux.intel.com>
35 * Yanmin Zhang <yanmin.zhang@intel.com>
36 * Wu Fengguang <fengguang.wu@intel.com>
37 * Mike Galbraith <efault@gmx.de>
38 * Paul Mackerras <paulus@samba.org>
6e750a8f 39 * Jaswinder Singh Rajput <jaswinder@kernel.org>
5242519b
IM
40 *
41 * Released under the GPL v2. (and only v2, not any later version)
ddcacfa0
IM
42 */
43
1a482f38 44#include "perf.h"
16f762a2 45#include "builtin.h"
f14d5707 46#include "util/cgroup.h"
148be2c1 47#include "util/util.h"
4b6ab94e 48#include <subcmd/parse-options.h>
5242519b 49#include "util/parse-events.h"
4cabc3d1 50#include "util/pmu.h"
8f28827a 51#include "util/event.h"
361c99a6 52#include "util/evlist.h"
69aad6f1 53#include "util/evsel.h"
8f28827a 54#include "util/debug.h"
a5d243d0 55#include "util/color.h"
0007ecea 56#include "util/stat.h"
60666c63 57#include "util/header.h"
a12b51c4 58#include "util/cpumap.h"
d6d901c2 59#include "util/thread.h"
fd78260b 60#include "util/thread_map.h"
d809560b 61#include "util/counts.h"
4979d0c7 62#include "util/session.h"
ddcacfa0 63
1f16c575 64#include <stdlib.h>
ddcacfa0 65#include <sys/prctl.h>
5af52b51 66#include <locale.h>
16c8a109 67
d7470b6a 68#define DEFAULT_SEPARATOR " "
2cee77c4
DA
69#define CNTR_NOT_SUPPORTED "<not supported>"
70#define CNTR_NOT_COUNTED "<not counted>"
d7470b6a 71
d4f63a47 72static void print_counters(struct timespec *ts, int argc, const char **argv);
13370a9b 73
4cabc3d1 74/* Default events used for perf stat -T */
a454742c
JO
75static const char *transaction_attrs = {
76 "task-clock,"
4cabc3d1
AK
77 "{"
78 "instructions,"
79 "cycles,"
80 "cpu/cycles-t/,"
81 "cpu/tx-start/,"
82 "cpu/el-start/,"
83 "cpu/cycles-ct/"
84 "}"
85};
86
87/* More limited version when the CPU does not have all events. */
a454742c
JO
88static const char * transaction_limited_attrs = {
89 "task-clock,"
4cabc3d1
AK
90 "{"
91 "instructions,"
92 "cycles,"
93 "cpu/cycles-t/,"
94 "cpu/tx-start/"
95 "}"
96};
97
666e6d48 98static struct perf_evlist *evsel_list;
361c99a6 99
602ad878 100static struct target target = {
77a6f014
NK
101 .uid = UINT_MAX,
102};
ddcacfa0 103
1e5a2931
JO
104typedef int (*aggr_get_id_t)(struct cpu_map *m, int cpu);
105
3d632595 106static int run_count = 1;
2e6cdf99 107static bool no_inherit = false;
d07f0b12 108static volatile pid_t child_pid = -1;
c0555642 109static bool null_run = false;
2cba3ffb 110static int detailed_run = 0;
4cabc3d1 111static bool transaction_run;
201e0b06 112static bool big_num = true;
d7470b6a 113static int big_num_opt = -1;
d7470b6a
SE
114static const char *csv_sep = NULL;
115static bool csv_output = false;
43bece79 116static bool group = false;
1f16c575
PZ
117static const char *pre_cmd = NULL;
118static const char *post_cmd = NULL;
119static bool sync_run = false;
41191688 120static unsigned int initial_delay = 0;
410136f5 121static unsigned int unit_width = 4; /* strlen("unit") */
a7e191c3 122static bool forever = false;
13370a9b 123static struct timespec ref_time;
86ee6e18 124static struct cpu_map *aggr_map;
1e5a2931 125static aggr_get_id_t aggr_get_id;
e0547311
JO
126static bool append_file;
127static const char *output_name;
128static int output_fd;
5af52b51 129
4979d0c7
JO
130struct perf_stat {
131 bool record;
132 struct perf_data_file file;
133 struct perf_session *session;
134 u64 bytes_written;
135};
136
137static struct perf_stat perf_stat;
138#define STAT_RECORD perf_stat.record
139
60666c63
LW
140static volatile int done = 0;
141
421a50f3
JO
142static struct perf_stat_config stat_config = {
143 .aggr_mode = AGGR_GLOBAL,
711a572e 144 .scale = true,
421a50f3
JO
145};
146
13370a9b
SE
147static inline void diff_timespec(struct timespec *r, struct timespec *a,
148 struct timespec *b)
149{
150 r->tv_sec = a->tv_sec - b->tv_sec;
151 if (a->tv_nsec < b->tv_nsec) {
152 r->tv_nsec = a->tv_nsec + 1000000000L - b->tv_nsec;
153 r->tv_sec--;
154 } else {
155 r->tv_nsec = a->tv_nsec - b->tv_nsec ;
156 }
157}
158
254ecbc7
JO
159static void perf_stat__reset_stats(void)
160{
161 perf_evlist__reset_stats(evsel_list);
f87027b9 162 perf_stat__reset_shadow_stats();
1eda3b21
JO
163}
164
cac21425 165static int create_perf_stat_counter(struct perf_evsel *evsel)
ddcacfa0 166{
69aad6f1 167 struct perf_event_attr *attr = &evsel->attr;
727ab04e 168
711a572e 169 if (stat_config.scale)
a21ca2ca
IM
170 attr->read_format = PERF_FORMAT_TOTAL_TIME_ENABLED |
171 PERF_FORMAT_TOTAL_TIME_RUNNING;
ddcacfa0 172
5d2cd909
ACM
173 attr->inherit = !no_inherit;
174
6acd8e92
JO
175 /*
176 * Some events get initialized with sample_(period/type) set,
177 * like tracepoints. Clear it up for counting.
178 */
179 attr->sample_period = 0;
4979d0c7
JO
180 /*
181 * But set sample_type to PERF_SAMPLE_IDENTIFIER, which should be harmless
182 * while avoiding that older tools show confusing messages.
183 */
184 attr->sample_type = PERF_SAMPLE_IDENTIFIER;
6acd8e92 185
67ccdecd
JO
186 /*
187 * Disabling all counters initially, they will be enabled
188 * either manually by us or by kernel via enable_on_exec
189 * set later.
190 */
c8280cec 191 if (perf_evsel__is_group_leader(evsel)) {
67ccdecd
JO
192 attr->disabled = 1;
193
c8280cec
JO
194 /*
195 * In case of initial_delay we enable tracee
196 * events manually.
197 */
198 if (target__none(&target) && !initial_delay)
41191688 199 attr->enable_on_exec = 1;
ddcacfa0 200 }
084ab9f8 201
c8280cec
JO
202 if (target__has_cpu(&target))
203 return perf_evsel__open_per_cpu(evsel, perf_evsel__cpus(evsel));
204
594ac61a 205 return perf_evsel__open_per_thread(evsel, evsel_list->threads);
ddcacfa0
IM
206}
207
c04f5e5d
IM
208/*
209 * Does the counter have nsecs as a unit?
210 */
daec78a0 211static inline int nsec_counter(struct perf_evsel *evsel)
c04f5e5d 212{
daec78a0
ACM
213 if (perf_evsel__match(evsel, SOFTWARE, SW_CPU_CLOCK) ||
214 perf_evsel__match(evsel, SOFTWARE, SW_TASK_CLOCK))
c04f5e5d
IM
215 return 1;
216
217 return 0;
218}
219
8b99b1a4
JO
220static int process_synthesized_event(struct perf_tool *tool __maybe_unused,
221 union perf_event *event,
222 struct perf_sample *sample __maybe_unused,
223 struct machine *machine __maybe_unused)
4979d0c7 224{
8b99b1a4 225 if (perf_data_file__write(&perf_stat.file, event, event->header.size) < 0) {
4979d0c7
JO
226 pr_err("failed to write perf data, error: %m\n");
227 return -1;
228 }
229
8b99b1a4 230 perf_stat.bytes_written += event->header.size;
4979d0c7
JO
231 return 0;
232}
233
7aad0c32
JO
234static int write_stat_round_event(u64 time, u64 type)
235{
236 return perf_event__synthesize_stat_round(NULL, time, type,
237 process_synthesized_event,
238 NULL);
239}
240
241#define WRITE_STAT_ROUND_EVENT(time, interval) \
242 write_stat_round_event(time, PERF_STAT_ROUND_TYPE__ ## interval)
243
5a6ea81b
JO
244#define SID(e, x, y) xyarray__entry(e->sample_id, x, y)
245
246static int
247perf_evsel__write_stat_event(struct perf_evsel *counter, u32 cpu, u32 thread,
248 struct perf_counts_values *count)
249{
250 struct perf_sample_id *sid = SID(counter, cpu, thread);
251
252 return perf_event__synthesize_stat(NULL, cpu, thread, sid->id, count,
253 process_synthesized_event, NULL);
254}
255
f5b4a9c3
SE
256/*
257 * Read out the results of a single counter:
258 * do not aggregate counts across CPUs in system-wide mode
259 */
c52b12ed 260static int read_counter(struct perf_evsel *counter)
f5b4a9c3 261{
9bf1a529
JO
262 int nthreads = thread_map__nr(evsel_list->threads);
263 int ncpus = perf_evsel__nr_cpus(counter);
264 int cpu, thread;
f5b4a9c3 265
3b4331d9
SP
266 if (!counter->supported)
267 return -ENOENT;
268
9bf1a529
JO
269 if (counter->system_wide)
270 nthreads = 1;
271
272 for (thread = 0; thread < nthreads; thread++) {
273 for (cpu = 0; cpu < ncpus; cpu++) {
3b3eb044
JO
274 struct perf_counts_values *count;
275
276 count = perf_counts(counter->counts, cpu, thread);
277 if (perf_evsel__read(counter, cpu, thread, count))
9bf1a529 278 return -1;
5a6ea81b
JO
279
280 if (STAT_RECORD) {
281 if (perf_evsel__write_stat_event(counter, cpu, thread, count)) {
282 pr_err("failed to write stat event\n");
283 return -1;
284 }
285 }
9bf1a529 286 }
f5b4a9c3 287 }
c52b12ed
ACM
288
289 return 0;
2996f5dd
IM
290}
291
5fc472a6 292static void read_counters(bool close_counters)
13370a9b 293{
13370a9b 294 struct perf_evsel *counter;
13370a9b 295
106a94a0 296 evlist__for_each(evsel_list, counter) {
3b3eb044 297 if (read_counter(counter))
245bad8e 298 pr_debug("failed to read counter %s\n", counter->name);
3b3eb044 299
f80010eb 300 if (perf_stat_process_counter(&stat_config, counter))
3b3eb044 301 pr_warning("failed to process counter %s\n", counter->name);
106a94a0 302
5fc472a6 303 if (close_counters) {
106a94a0
JO
304 perf_evsel__close_fd(counter, perf_evsel__nr_cpus(counter),
305 thread_map__nr(evsel_list->threads));
13370a9b
SE
306 }
307 }
106a94a0
JO
308}
309
ba411a95 310static void process_interval(void)
106a94a0 311{
106a94a0 312 struct timespec ts, rs;
106a94a0
JO
313
314 read_counters(false);
86ee6e18 315
13370a9b
SE
316 clock_gettime(CLOCK_MONOTONIC, &ts);
317 diff_timespec(&rs, &ts, &ref_time);
13370a9b 318
7aad0c32
JO
319 if (STAT_RECORD) {
320 if (WRITE_STAT_ROUND_EVENT(rs.tv_sec * NSECS_PER_SEC + rs.tv_nsec, INTERVAL))
321 pr_err("failed to write stat round event\n");
322 }
323
d4f63a47 324 print_counters(&rs, 0, NULL);
13370a9b
SE
325}
326
67ccdecd 327static void enable_counters(void)
41191688 328{
67ccdecd 329 if (initial_delay)
41191688 330 usleep(initial_delay * 1000);
67ccdecd
JO
331
332 /*
333 * We need to enable counters only if:
334 * - we don't have tracee (attaching to task or cpu)
335 * - we have initial delay configured
336 */
337 if (!target__none(&target) || initial_delay)
ab46db0a 338 perf_evlist__enable(evsel_list);
41191688
AK
339}
340
f33cbe72 341static volatile int workload_exec_errno;
6af206fd
ACM
342
343/*
344 * perf_evlist__prepare_workload will send a SIGUSR1
345 * if the fork fails, since we asked by setting its
346 * want_signal to true.
347 */
f33cbe72
ACM
348static void workload_exec_failed_signal(int signo __maybe_unused, siginfo_t *info,
349 void *ucontext __maybe_unused)
6af206fd 350{
f33cbe72 351 workload_exec_errno = info->si_value.sival_int;
6af206fd
ACM
352}
353
7b60a7e3
JO
354static bool has_unit(struct perf_evsel *counter)
355{
356 return counter->unit && *counter->unit;
357}
358
359static bool has_scale(struct perf_evsel *counter)
360{
361 return counter->scale != 1;
362}
363
664c98d4 364static int perf_stat_synthesize_config(bool is_pipe)
8b99b1a4 365{
7b60a7e3 366 struct perf_evsel *counter;
8b99b1a4
JO
367 int err;
368
664c98d4
JO
369 if (is_pipe) {
370 err = perf_event__synthesize_attrs(NULL, perf_stat.session,
371 process_synthesized_event);
372 if (err < 0) {
373 pr_err("Couldn't synthesize attrs.\n");
374 return err;
375 }
376 }
377
7b60a7e3
JO
378 /*
379 * Synthesize other events stuff not carried within
380 * attr event - unit, scale, name
381 */
382 evlist__for_each(evsel_list, counter) {
383 if (!counter->supported)
384 continue;
385
386 /*
387 * Synthesize unit and scale only if it's defined.
388 */
389 if (has_unit(counter)) {
390 err = perf_event__synthesize_event_update_unit(NULL, counter, process_synthesized_event);
391 if (err < 0) {
392 pr_err("Couldn't synthesize evsel unit.\n");
393 return err;
394 }
395 }
396
397 if (has_scale(counter)) {
398 err = perf_event__synthesize_event_update_scale(NULL, counter, process_synthesized_event);
399 if (err < 0) {
400 pr_err("Couldn't synthesize evsel scale.\n");
401 return err;
402 }
403 }
404
405 if (counter->own_cpus) {
406 err = perf_event__synthesize_event_update_cpus(NULL, counter, process_synthesized_event);
407 if (err < 0) {
408 pr_err("Couldn't synthesize evsel scale.\n");
409 return err;
410 }
411 }
412
413 /*
414 * Name is needed only for pipe output,
415 * perf.data carries event names.
416 */
417 if (is_pipe) {
418 err = perf_event__synthesize_event_update_name(NULL, counter, process_synthesized_event);
419 if (err < 0) {
420 pr_err("Couldn't synthesize evsel name.\n");
421 return err;
422 }
423 }
424 }
425
8b99b1a4
JO
426 err = perf_event__synthesize_thread_map2(NULL, evsel_list->threads,
427 process_synthesized_event,
428 NULL);
429 if (err < 0) {
430 pr_err("Couldn't synthesize thread map.\n");
431 return err;
432 }
433
434 err = perf_event__synthesize_cpu_map(NULL, evsel_list->cpus,
435 process_synthesized_event, NULL);
436 if (err < 0) {
437 pr_err("Couldn't synthesize thread map.\n");
438 return err;
439 }
440
441 err = perf_event__synthesize_stat_config(NULL, &stat_config,
442 process_synthesized_event, NULL);
443 if (err < 0) {
444 pr_err("Couldn't synthesize config.\n");
445 return err;
446 }
447
448 return 0;
449}
450
2af4646d
JO
451#define FD(e, x, y) (*(int *)xyarray__entry(e->fd, x, y))
452
453static int __store_counter_ids(struct perf_evsel *counter,
454 struct cpu_map *cpus,
455 struct thread_map *threads)
456{
457 int cpu, thread;
458
459 for (cpu = 0; cpu < cpus->nr; cpu++) {
460 for (thread = 0; thread < threads->nr; thread++) {
461 int fd = FD(counter, cpu, thread);
462
463 if (perf_evlist__id_add_fd(evsel_list, counter,
464 cpu, thread, fd) < 0)
465 return -1;
466 }
467 }
468
469 return 0;
470}
471
472static int store_counter_ids(struct perf_evsel *counter)
473{
474 struct cpu_map *cpus = counter->cpus;
475 struct thread_map *threads = counter->threads;
476
477 if (perf_evsel__alloc_id(counter, cpus->nr, threads->nr))
478 return -ENOMEM;
479
480 return __store_counter_ids(counter, cpus, threads);
481}
482
acf28922 483static int __run_perf_stat(int argc, const char **argv)
42202dd5 484{
ec0d3d1f 485 int interval = stat_config.interval;
56e52e85 486 char msg[512];
42202dd5 487 unsigned long long t0, t1;
cac21425 488 struct perf_evsel *counter;
13370a9b 489 struct timespec ts;
410136f5 490 size_t l;
42202dd5 491 int status = 0;
6be2850e 492 const bool forks = (argc > 0);
664c98d4 493 bool is_pipe = STAT_RECORD ? perf_stat.file.is_pipe : false;
42202dd5 494
13370a9b
SE
495 if (interval) {
496 ts.tv_sec = interval / 1000;
497 ts.tv_nsec = (interval % 1000) * 1000000;
498 } else {
499 ts.tv_sec = 1;
500 ts.tv_nsec = 0;
501 }
502
60666c63 503 if (forks) {
664c98d4 504 if (perf_evlist__prepare_workload(evsel_list, &target, argv, is_pipe,
735f7e0b 505 workload_exec_failed_signal) < 0) {
acf28922
NK
506 perror("failed to prepare workload");
507 return -1;
60666c63 508 }
d20a47e7 509 child_pid = evsel_list->workload.pid;
051ae7f7
PM
510 }
511
6a4bb04c 512 if (group)
63dab225 513 perf_evlist__set_leader(evsel_list);
6a4bb04c 514
0050f7aa 515 evlist__for_each(evsel_list, counter) {
cac21425 516 if (create_perf_stat_counter(counter) < 0) {
979987a5
DA
517 /*
518 * PPC returns ENXIO for HW counters until 2.6.37
519 * (behavior changed with commit b0a873e).
520 */
38f6ae1e 521 if (errno == EINVAL || errno == ENOSYS ||
979987a5
DA
522 errno == ENOENT || errno == EOPNOTSUPP ||
523 errno == ENXIO) {
c63ca0c0
DA
524 if (verbose)
525 ui__warning("%s event is not supported by the kernel.\n",
7289f83c 526 perf_evsel__name(counter));
2cee77c4 527 counter->supported = false;
cb5ef600
KL
528
529 if ((counter->leader != counter) ||
530 !(counter->leader->nr_members > 1))
531 continue;
c63ca0c0 532 }
ede70290 533
56e52e85
ACM
534 perf_evsel__open_strerror(counter, &target,
535 errno, msg, sizeof(msg));
536 ui__error("%s\n", msg);
537
48290609
ACM
538 if (child_pid != -1)
539 kill(child_pid, SIGTERM);
fceda7fe 540
48290609
ACM
541 return -1;
542 }
2cee77c4 543 counter->supported = true;
410136f5
SE
544
545 l = strlen(counter->unit);
546 if (l > unit_width)
547 unit_width = l;
2af4646d
JO
548
549 if (STAT_RECORD && store_counter_ids(counter))
550 return -1;
084ab9f8 551 }
42202dd5 552
23d4aad4
ACM
553 if (perf_evlist__apply_filters(evsel_list, &counter)) {
554 error("failed to set filter \"%s\" on event %s with %d (%s)\n",
555 counter->filter, perf_evsel__name(counter), errno,
759e612b 556 strerror_r(errno, msg, sizeof(msg)));
cfd748ae
FW
557 return -1;
558 }
559
4979d0c7
JO
560 if (STAT_RECORD) {
561 int err, fd = perf_data_file__fd(&perf_stat.file);
562
664c98d4
JO
563 if (is_pipe) {
564 err = perf_header__write_pipe(perf_data_file__fd(&perf_stat.file));
565 } else {
566 err = perf_session__write_header(perf_stat.session, evsel_list,
567 fd, false);
568 }
569
4979d0c7
JO
570 if (err < 0)
571 return err;
8b99b1a4 572
664c98d4 573 err = perf_stat_synthesize_config(is_pipe);
8b99b1a4
JO
574 if (err < 0)
575 return err;
4979d0c7
JO
576 }
577
42202dd5
IM
578 /*
579 * Enable counters and exec the command:
580 */
581 t0 = rdclock();
13370a9b 582 clock_gettime(CLOCK_MONOTONIC, &ref_time);
42202dd5 583
60666c63 584 if (forks) {
acf28922 585 perf_evlist__start_workload(evsel_list);
67ccdecd 586 enable_counters();
acf28922 587
13370a9b
SE
588 if (interval) {
589 while (!waitpid(child_pid, &status, WNOHANG)) {
590 nanosleep(&ts, NULL);
ba411a95 591 process_interval();
13370a9b
SE
592 }
593 }
60666c63 594 wait(&status);
6af206fd 595
f33cbe72
ACM
596 if (workload_exec_errno) {
597 const char *emsg = strerror_r(workload_exec_errno, msg, sizeof(msg));
598 pr_err("Workload failed: %s\n", emsg);
6af206fd 599 return -1;
f33cbe72 600 }
6af206fd 601
33e49ea7
AK
602 if (WIFSIGNALED(status))
603 psignal(WTERMSIG(status), argv[0]);
60666c63 604 } else {
67ccdecd 605 enable_counters();
13370a9b
SE
606 while (!done) {
607 nanosleep(&ts, NULL);
608 if (interval)
ba411a95 609 process_interval();
13370a9b 610 }
60666c63 611 }
42202dd5 612
42202dd5
IM
613 t1 = rdclock();
614
9e9772c4 615 update_stats(&walltime_nsecs_stats, t1 - t0);
42202dd5 616
106a94a0 617 read_counters(true);
c52b12ed 618
42202dd5
IM
619 return WEXITSTATUS(status);
620}
621
41cde476 622static int run_perf_stat(int argc, const char **argv)
1f16c575
PZ
623{
624 int ret;
625
626 if (pre_cmd) {
627 ret = system(pre_cmd);
628 if (ret)
629 return ret;
630 }
631
632 if (sync_run)
633 sync();
634
635 ret = __run_perf_stat(argc, argv);
636 if (ret)
637 return ret;
638
639 if (post_cmd) {
640 ret = system(post_cmd);
641 if (ret)
642 return ret;
643 }
644
645 return ret;
646}
647
d73515c0
AK
648static void print_running(u64 run, u64 ena)
649{
650 if (csv_output) {
5821522e 651 fprintf(stat_config.output, "%s%" PRIu64 "%s%.2f",
d73515c0
AK
652 csv_sep,
653 run,
654 csv_sep,
655 ena ? 100.0 * run / ena : 100.0);
656 } else if (run != ena) {
5821522e 657 fprintf(stat_config.output, " (%.2f%%)", 100.0 * run / ena);
d73515c0
AK
658 }
659}
660
f99844cb
IM
661static void print_noise_pct(double total, double avg)
662{
0007ecea 663 double pct = rel_stddev_stats(total, avg);
f99844cb 664
3ae9a34d 665 if (csv_output)
5821522e 666 fprintf(stat_config.output, "%s%.2f%%", csv_sep, pct);
a1bca6cc 667 else if (pct)
5821522e 668 fprintf(stat_config.output, " ( +-%6.2f%% )", pct);
f99844cb
IM
669}
670
69aad6f1 671static void print_noise(struct perf_evsel *evsel, double avg)
42202dd5 672{
581cc8a2 673 struct perf_stat_evsel *ps;
69aad6f1 674
849abde9
PZ
675 if (run_count == 1)
676 return;
677
69aad6f1 678 ps = evsel->priv;
f99844cb 679 print_noise_pct(stddev_stats(&ps->res_stats[0]), avg);
42202dd5
IM
680}
681
12c08a9f 682static void aggr_printout(struct perf_evsel *evsel, int id, int nr)
44175b6f 683{
421a50f3 684 switch (stat_config.aggr_mode) {
12c08a9f 685 case AGGR_CORE:
5821522e 686 fprintf(stat_config.output, "S%d-C%*d%s%*d%s",
12c08a9f
SE
687 cpu_map__id_to_socket(id),
688 csv_output ? 0 : -8,
689 cpu_map__id_to_cpu(id),
690 csv_sep,
691 csv_output ? 0 : 4,
692 nr,
693 csv_sep);
694 break;
86ee6e18 695 case AGGR_SOCKET:
5821522e 696 fprintf(stat_config.output, "S%*d%s%*d%s",
d7e7a451 697 csv_output ? 0 : -5,
12c08a9f 698 id,
d7e7a451
SE
699 csv_sep,
700 csv_output ? 0 : 4,
701 nr,
702 csv_sep);
86ee6e18
SE
703 break;
704 case AGGR_NONE:
5821522e 705 fprintf(stat_config.output, "CPU%*d%s",
d7470b6a 706 csv_output ? 0 : -4,
12c08a9f 707 perf_evsel__cpus(evsel)->map[id], csv_sep);
86ee6e18 708 break;
32b8af82 709 case AGGR_THREAD:
5821522e 710 fprintf(stat_config.output, "%*s-%*d%s",
32b8af82
JO
711 csv_output ? 0 : 16,
712 thread_map__comm(evsel->threads, id),
713 csv_output ? 0 : -8,
714 thread_map__pid(evsel->threads, id),
715 csv_sep);
716 break;
86ee6e18 717 case AGGR_GLOBAL:
208df99e 718 case AGGR_UNSET:
86ee6e18
SE
719 default:
720 break;
721 }
722}
723
da88c7f7 724static void nsec_printout(int id, int nr, struct perf_evsel *evsel, double avg)
86ee6e18 725{
5821522e 726 FILE *output = stat_config.output;
86ee6e18 727 double msecs = avg / 1e6;
410136f5 728 const char *fmt_v, *fmt_n;
4bbe5a61 729 char name[25];
86ee6e18 730
410136f5
SE
731 fmt_v = csv_output ? "%.6f%s" : "%18.6f%s";
732 fmt_n = csv_output ? "%s" : "%-25s";
733
da88c7f7 734 aggr_printout(evsel, id, nr);
d7470b6a 735
4bbe5a61
DA
736 scnprintf(name, sizeof(name), "%s%s",
737 perf_evsel__name(evsel), csv_output ? "" : " (msec)");
410136f5
SE
738
739 fprintf(output, fmt_v, msecs, csv_sep);
740
741 if (csv_output)
742 fprintf(output, "%s%s", evsel->unit, csv_sep);
743 else
744 fprintf(output, "%-*s%s", unit_width, evsel->unit, csv_sep);
745
746 fprintf(output, fmt_n, name);
d7470b6a 747
023695d9 748 if (evsel->cgrp)
4aa9015f 749 fprintf(output, "%s%s", csv_sep, evsel->cgrp->name);
44175b6f
IM
750}
751
556b1fb7
JO
752static void abs_printout(int id, int nr, struct perf_evsel *evsel, double avg)
753{
5821522e 754 FILE *output = stat_config.output;
556b1fb7
JO
755 double sc = evsel->scale;
756 const char *fmt;
556b1fb7
JO
757
758 if (csv_output) {
759 fmt = sc != 1.0 ? "%.2f%s" : "%.0f%s";
760 } else {
761 if (big_num)
762 fmt = sc != 1.0 ? "%'18.2f%s" : "%'18.0f%s";
763 else
764 fmt = sc != 1.0 ? "%18.2f%s" : "%18.0f%s";
765 }
766
767 aggr_printout(evsel, id, nr);
768
556b1fb7
JO
769 fprintf(output, fmt, avg, csv_sep);
770
771 if (evsel->unit)
772 fprintf(output, "%-*s%s",
773 csv_output ? 0 : unit_width,
774 evsel->unit, csv_sep);
775
776 fprintf(output, "%-*s", csv_output ? 0 : 25, perf_evsel__name(evsel));
777
778 if (evsel->cgrp)
779 fprintf(output, "%s%s", csv_sep, evsel->cgrp->name);
eedfcb4b 780}
556b1fb7 781
eedfcb4b
AK
782static void printout(int id, int nr, struct perf_evsel *counter, double uval)
783{
784 int cpu = cpu_map__id_to_cpu(id);
785
786 if (stat_config.aggr_mode == AGGR_GLOBAL)
787 cpu = 0;
788
789 if (nsec_counter(counter))
790 nsec_printout(id, nr, counter, uval);
791 else
792 abs_printout(id, nr, counter, uval);
556b1fb7 793
eedfcb4b
AK
794 if (!csv_output && !stat_config.interval)
795 perf_stat__print_shadow_stats(stat_config.output, counter,
796 uval, cpu,
797 stat_config.aggr_mode);
556b1fb7
JO
798}
799
86ee6e18 800static void print_aggr(char *prefix)
d7e7a451 801{
5821522e 802 FILE *output = stat_config.output;
d7e7a451 803 struct perf_evsel *counter;
601083cf 804 int cpu, s, s2, id, nr;
410136f5 805 double uval;
d7e7a451 806 u64 ena, run, val;
d7e7a451 807
86ee6e18 808 if (!(aggr_map || aggr_get_id))
d7e7a451
SE
809 return;
810
86ee6e18
SE
811 for (s = 0; s < aggr_map->nr; s++) {
812 id = aggr_map->map[s];
0050f7aa 813 evlist__for_each(evsel_list, counter) {
d7e7a451
SE
814 val = ena = run = 0;
815 nr = 0;
816 for (cpu = 0; cpu < perf_evsel__nr_cpus(counter); cpu++) {
601083cf 817 s2 = aggr_get_id(perf_evsel__cpus(counter), cpu);
86ee6e18 818 if (s2 != id)
d7e7a451 819 continue;
a6fa0038
JO
820 val += perf_counts(counter->counts, cpu, 0)->val;
821 ena += perf_counts(counter->counts, cpu, 0)->ena;
822 run += perf_counts(counter->counts, cpu, 0)->run;
d7e7a451
SE
823 nr++;
824 }
825 if (prefix)
826 fprintf(output, "%s", prefix);
827
828 if (run == 0 || ena == 0) {
582ec082 829 aggr_printout(counter, id, nr);
86ee6e18 830
410136f5 831 fprintf(output, "%*s%s",
d7e7a451
SE
832 csv_output ? 0 : 18,
833 counter->supported ? CNTR_NOT_COUNTED : CNTR_NOT_SUPPORTED,
410136f5
SE
834 csv_sep);
835
836 fprintf(output, "%-*s%s",
837 csv_output ? 0 : unit_width,
838 counter->unit, csv_sep);
839
840 fprintf(output, "%*s",
841 csv_output ? 0 : -25,
d7e7a451 842 perf_evsel__name(counter));
86ee6e18 843
d7e7a451
SE
844 if (counter->cgrp)
845 fprintf(output, "%s%s",
846 csv_sep, counter->cgrp->name);
847
d73515c0 848 print_running(run, ena);
d7e7a451
SE
849 fputc('\n', output);
850 continue;
851 }
410136f5 852 uval = val * counter->scale;
eedfcb4b 853 printout(id, nr, counter, uval);
d73515c0 854 if (!csv_output)
d7e7a451
SE
855 print_noise(counter, 1.0);
856
d73515c0 857 print_running(run, ena);
d7e7a451
SE
858 fputc('\n', output);
859 }
860 }
861}
862
32b8af82
JO
863static void print_aggr_thread(struct perf_evsel *counter, char *prefix)
864{
5821522e 865 FILE *output = stat_config.output;
32b8af82
JO
866 int nthreads = thread_map__nr(counter->threads);
867 int ncpus = cpu_map__nr(counter->cpus);
868 int cpu, thread;
869 double uval;
870
871 for (thread = 0; thread < nthreads; thread++) {
872 u64 ena = 0, run = 0, val = 0;
873
874 for (cpu = 0; cpu < ncpus; cpu++) {
875 val += perf_counts(counter->counts, cpu, thread)->val;
876 ena += perf_counts(counter->counts, cpu, thread)->ena;
877 run += perf_counts(counter->counts, cpu, thread)->run;
878 }
879
880 if (prefix)
881 fprintf(output, "%s", prefix);
882
883 uval = val * counter->scale;
eedfcb4b 884 printout(thread, 0, counter, uval);
32b8af82
JO
885
886 if (!csv_output)
887 print_noise(counter, 1.0);
888
889 print_running(run, ena);
890 fputc('\n', output);
891 }
892}
893
2996f5dd
IM
894/*
895 * Print out the results of a single counter:
f5b4a9c3 896 * aggregated counts in system-wide mode
2996f5dd 897 */
13370a9b 898static void print_counter_aggr(struct perf_evsel *counter, char *prefix)
2996f5dd 899{
5821522e 900 FILE *output = stat_config.output;
581cc8a2 901 struct perf_stat_evsel *ps = counter->priv;
69aad6f1 902 double avg = avg_stats(&ps->res_stats[0]);
c52b12ed 903 int scaled = counter->counts->scaled;
410136f5 904 double uval;
d73515c0
AK
905 double avg_enabled, avg_running;
906
907 avg_enabled = avg_stats(&ps->res_stats[1]);
908 avg_running = avg_stats(&ps->res_stats[2]);
2996f5dd 909
13370a9b
SE
910 if (prefix)
911 fprintf(output, "%s", prefix);
912
3b4331d9 913 if (scaled == -1 || !counter->supported) {
410136f5 914 fprintf(output, "%*s%s",
d7470b6a 915 csv_output ? 0 : 18,
2cee77c4 916 counter->supported ? CNTR_NOT_COUNTED : CNTR_NOT_SUPPORTED,
410136f5
SE
917 csv_sep);
918 fprintf(output, "%-*s%s",
919 csv_output ? 0 : unit_width,
920 counter->unit, csv_sep);
921 fprintf(output, "%*s",
922 csv_output ? 0 : -25,
7289f83c 923 perf_evsel__name(counter));
023695d9
SE
924
925 if (counter->cgrp)
4aa9015f 926 fprintf(output, "%s%s", csv_sep, counter->cgrp->name);
023695d9 927
d73515c0 928 print_running(avg_running, avg_enabled);
4aa9015f 929 fputc('\n', output);
2996f5dd
IM
930 return;
931 }
c04f5e5d 932
410136f5 933 uval = avg * counter->scale;
eedfcb4b 934 printout(-1, 0, counter, uval);
849abde9 935
3ae9a34d
ZH
936 print_noise(counter, avg);
937
d73515c0 938 print_running(avg_running, avg_enabled);
4aa9015f 939 fprintf(output, "\n");
c04f5e5d
IM
940}
941
f5b4a9c3
SE
942/*
943 * Print out the results of a single counter:
944 * does not use aggregated count in system-wide
945 */
13370a9b 946static void print_counter(struct perf_evsel *counter, char *prefix)
f5b4a9c3 947{
5821522e 948 FILE *output = stat_config.output;
f5b4a9c3 949 u64 ena, run, val;
410136f5 950 double uval;
f5b4a9c3
SE
951 int cpu;
952
7ae92e74 953 for (cpu = 0; cpu < perf_evsel__nr_cpus(counter); cpu++) {
a6fa0038
JO
954 val = perf_counts(counter->counts, cpu, 0)->val;
955 ena = perf_counts(counter->counts, cpu, 0)->ena;
956 run = perf_counts(counter->counts, cpu, 0)->run;
13370a9b
SE
957
958 if (prefix)
959 fprintf(output, "%s", prefix);
960
f5b4a9c3 961 if (run == 0 || ena == 0) {
410136f5 962 fprintf(output, "CPU%*d%s%*s%s",
d7470b6a 963 csv_output ? 0 : -4,
7ae92e74 964 perf_evsel__cpus(counter)->map[cpu], csv_sep,
d7470b6a 965 csv_output ? 0 : 18,
2cee77c4 966 counter->supported ? CNTR_NOT_COUNTED : CNTR_NOT_SUPPORTED,
410136f5
SE
967 csv_sep);
968
969 fprintf(output, "%-*s%s",
970 csv_output ? 0 : unit_width,
971 counter->unit, csv_sep);
972
973 fprintf(output, "%*s",
974 csv_output ? 0 : -25,
975 perf_evsel__name(counter));
f5b4a9c3 976
023695d9 977 if (counter->cgrp)
4aa9015f
SE
978 fprintf(output, "%s%s",
979 csv_sep, counter->cgrp->name);
023695d9 980
d73515c0 981 print_running(run, ena);
4aa9015f 982 fputc('\n', output);
f5b4a9c3
SE
983 continue;
984 }
985
410136f5 986 uval = val * counter->scale;
eedfcb4b 987 printout(cpu, 0, counter, uval);
d73515c0 988 if (!csv_output)
d7470b6a 989 print_noise(counter, 1.0);
d73515c0 990 print_running(run, ena);
f5b4a9c3 991
4aa9015f 992 fputc('\n', output);
f5b4a9c3
SE
993 }
994}
995
d4f63a47
JO
996static void print_interval(char *prefix, struct timespec *ts)
997{
5821522e 998 FILE *output = stat_config.output;
d4f63a47
JO
999 static int num_print_interval;
1000
1001 sprintf(prefix, "%6lu.%09lu%s", ts->tv_sec, ts->tv_nsec, csv_sep);
1002
1003 if (num_print_interval == 0 && !csv_output) {
421a50f3 1004 switch (stat_config.aggr_mode) {
d4f63a47
JO
1005 case AGGR_SOCKET:
1006 fprintf(output, "# time socket cpus counts %*s events\n", unit_width, "unit");
1007 break;
1008 case AGGR_CORE:
1009 fprintf(output, "# time core cpus counts %*s events\n", unit_width, "unit");
1010 break;
1011 case AGGR_NONE:
1012 fprintf(output, "# time CPU counts %*s events\n", unit_width, "unit");
1013 break;
32b8af82
JO
1014 case AGGR_THREAD:
1015 fprintf(output, "# time comm-pid counts %*s events\n", unit_width, "unit");
1016 break;
d4f63a47
JO
1017 case AGGR_GLOBAL:
1018 default:
1019 fprintf(output, "# time counts %*s events\n", unit_width, "unit");
208df99e
JO
1020 case AGGR_UNSET:
1021 break;
d4f63a47
JO
1022 }
1023 }
1024
1025 if (++num_print_interval == 25)
1026 num_print_interval = 0;
1027}
1028
1029static void print_header(int argc, const char **argv)
42202dd5 1030{
5821522e 1031 FILE *output = stat_config.output;
69aad6f1 1032 int i;
42202dd5 1033
ddcacfa0
IM
1034 fflush(stdout);
1035
d7470b6a 1036 if (!csv_output) {
4aa9015f
SE
1037 fprintf(output, "\n");
1038 fprintf(output, " Performance counter stats for ");
62d3b617
DA
1039 if (target.system_wide)
1040 fprintf(output, "\'system wide");
1041 else if (target.cpu_list)
1042 fprintf(output, "\'CPU(s) %s", target.cpu_list);
602ad878 1043 else if (!target__has_task(&target)) {
4aa9015f 1044 fprintf(output, "\'%s", argv[0]);
d7470b6a 1045 for (i = 1; i < argc; i++)
4aa9015f 1046 fprintf(output, " %s", argv[i]);
20f946b4
NK
1047 } else if (target.pid)
1048 fprintf(output, "process id \'%s", target.pid);
d7470b6a 1049 else
20f946b4 1050 fprintf(output, "thread id \'%s", target.tid);
44db76c8 1051
4aa9015f 1052 fprintf(output, "\'");
d7470b6a 1053 if (run_count > 1)
4aa9015f
SE
1054 fprintf(output, " (%d runs)", run_count);
1055 fprintf(output, ":\n\n");
d7470b6a 1056 }
d4f63a47
JO
1057}
1058
1059static void print_footer(void)
1060{
5821522e
JO
1061 FILE *output = stat_config.output;
1062
d4f63a47
JO
1063 if (!null_run)
1064 fprintf(output, "\n");
1065 fprintf(output, " %17.9f seconds time elapsed",
1066 avg_stats(&walltime_nsecs_stats)/1e9);
1067 if (run_count > 1) {
1068 fprintf(output, " ");
1069 print_noise_pct(stddev_stats(&walltime_nsecs_stats),
1070 avg_stats(&walltime_nsecs_stats));
1071 }
1072 fprintf(output, "\n\n");
1073}
1074
1075static void print_counters(struct timespec *ts, int argc, const char **argv)
1076{
ec0d3d1f 1077 int interval = stat_config.interval;
d4f63a47
JO
1078 struct perf_evsel *counter;
1079 char buf[64], *prefix = NULL;
1080
664c98d4
JO
1081 /* Do not print anything if we record to the pipe. */
1082 if (STAT_RECORD && perf_stat.file.is_pipe)
1083 return;
1084
d4f63a47
JO
1085 if (interval)
1086 print_interval(prefix = buf, ts);
1087 else
1088 print_header(argc, argv);
2996f5dd 1089
421a50f3 1090 switch (stat_config.aggr_mode) {
12c08a9f 1091 case AGGR_CORE:
86ee6e18 1092 case AGGR_SOCKET:
d4f63a47 1093 print_aggr(prefix);
86ee6e18 1094 break;
32b8af82
JO
1095 case AGGR_THREAD:
1096 evlist__for_each(evsel_list, counter)
1097 print_aggr_thread(counter, prefix);
1098 break;
86ee6e18 1099 case AGGR_GLOBAL:
0050f7aa 1100 evlist__for_each(evsel_list, counter)
d4f63a47 1101 print_counter_aggr(counter, prefix);
86ee6e18
SE
1102 break;
1103 case AGGR_NONE:
0050f7aa 1104 evlist__for_each(evsel_list, counter)
d4f63a47 1105 print_counter(counter, prefix);
86ee6e18 1106 break;
208df99e 1107 case AGGR_UNSET:
86ee6e18
SE
1108 default:
1109 break;
f5b4a9c3 1110 }
ddcacfa0 1111
d4f63a47
JO
1112 if (!interval && !csv_output)
1113 print_footer();
1114
5821522e 1115 fflush(stat_config.output);
ddcacfa0
IM
1116}
1117
f7b7c26e
PZ
1118static volatile int signr = -1;
1119
5242519b 1120static void skip_signal(int signo)
ddcacfa0 1121{
ec0d3d1f 1122 if ((child_pid == -1) || stat_config.interval)
60666c63
LW
1123 done = 1;
1124
f7b7c26e 1125 signr = signo;
d07f0b12
SE
1126 /*
1127 * render child_pid harmless
1128 * won't send SIGTERM to a random
1129 * process in case of race condition
1130 * and fast PID recycling
1131 */
1132 child_pid = -1;
f7b7c26e
PZ
1133}
1134
1135static void sig_atexit(void)
1136{
d07f0b12
SE
1137 sigset_t set, oset;
1138
1139 /*
1140 * avoid race condition with SIGCHLD handler
1141 * in skip_signal() which is modifying child_pid
1142 * goal is to avoid send SIGTERM to a random
1143 * process
1144 */
1145 sigemptyset(&set);
1146 sigaddset(&set, SIGCHLD);
1147 sigprocmask(SIG_BLOCK, &set, &oset);
1148
933da83a
CW
1149 if (child_pid != -1)
1150 kill(child_pid, SIGTERM);
1151
d07f0b12
SE
1152 sigprocmask(SIG_SETMASK, &oset, NULL);
1153
f7b7c26e
PZ
1154 if (signr == -1)
1155 return;
1156
1157 signal(signr, SIG_DFL);
1158 kill(getpid(), signr);
5242519b
IM
1159}
1160
1d037ca1
IT
1161static int stat__set_big_num(const struct option *opt __maybe_unused,
1162 const char *s __maybe_unused, int unset)
d7470b6a
SE
1163{
1164 big_num_opt = unset ? 0 : 1;
1165 return 0;
1166}
1167
e0547311
JO
1168static const struct option stat_options[] = {
1169 OPT_BOOLEAN('T', "transaction", &transaction_run,
1170 "hardware transaction statistics"),
1171 OPT_CALLBACK('e', "event", &evsel_list, "event",
1172 "event selector. use 'perf list' to list available events",
1173 parse_events_option),
1174 OPT_CALLBACK(0, "filter", &evsel_list, "filter",
1175 "event filter", parse_filter),
1176 OPT_BOOLEAN('i', "no-inherit", &no_inherit,
1177 "child tasks do not inherit counters"),
1178 OPT_STRING('p', "pid", &target.pid, "pid",
1179 "stat events on existing process id"),
1180 OPT_STRING('t', "tid", &target.tid, "tid",
1181 "stat events on existing thread id"),
1182 OPT_BOOLEAN('a', "all-cpus", &target.system_wide,
1183 "system-wide collection from all CPUs"),
1184 OPT_BOOLEAN('g', "group", &group,
1185 "put the counters into a counter group"),
1186 OPT_BOOLEAN('c', "scale", &stat_config.scale, "scale/normalize counters"),
1187 OPT_INCR('v', "verbose", &verbose,
1188 "be more verbose (show counter open errors, etc)"),
1189 OPT_INTEGER('r', "repeat", &run_count,
1190 "repeat command and print average + stddev (max: 100, forever: 0)"),
1191 OPT_BOOLEAN('n', "null", &null_run,
1192 "null run - dont start any counters"),
1193 OPT_INCR('d', "detailed", &detailed_run,
1194 "detailed run - start a lot of events"),
1195 OPT_BOOLEAN('S', "sync", &sync_run,
1196 "call sync() before starting a run"),
1197 OPT_CALLBACK_NOOPT('B', "big-num", NULL, NULL,
1198 "print large numbers with thousands\' separators",
1199 stat__set_big_num),
1200 OPT_STRING('C', "cpu", &target.cpu_list, "cpu",
1201 "list of cpus to monitor in system-wide"),
1202 OPT_SET_UINT('A', "no-aggr", &stat_config.aggr_mode,
1203 "disable CPU count aggregation", AGGR_NONE),
1204 OPT_STRING('x', "field-separator", &csv_sep, "separator",
1205 "print counts with custom separator"),
1206 OPT_CALLBACK('G', "cgroup", &evsel_list, "name",
1207 "monitor event in cgroup name only", parse_cgroups),
1208 OPT_STRING('o', "output", &output_name, "file", "output file name"),
1209 OPT_BOOLEAN(0, "append", &append_file, "append to the output file"),
1210 OPT_INTEGER(0, "log-fd", &output_fd,
1211 "log output to fd, instead of stderr"),
1212 OPT_STRING(0, "pre", &pre_cmd, "command",
1213 "command to run prior to the measured command"),
1214 OPT_STRING(0, "post", &post_cmd, "command",
1215 "command to run after to the measured command"),
1216 OPT_UINTEGER('I', "interval-print", &stat_config.interval,
1217 "print counts at regular interval in ms (>= 10)"),
1218 OPT_SET_UINT(0, "per-socket", &stat_config.aggr_mode,
1219 "aggregate counts per processor socket", AGGR_SOCKET),
1220 OPT_SET_UINT(0, "per-core", &stat_config.aggr_mode,
1221 "aggregate counts per physical processor core", AGGR_CORE),
1222 OPT_SET_UINT(0, "per-thread", &stat_config.aggr_mode,
1223 "aggregate counts per thread", AGGR_THREAD),
1224 OPT_UINTEGER('D', "delay", &initial_delay,
1225 "ms to wait before starting measurement after program start"),
1226 OPT_END()
1227};
1228
1fe7a300
JO
1229static int perf_stat__get_socket(struct cpu_map *map, int cpu)
1230{
1231 return cpu_map__get_socket(map, cpu, NULL);
1232}
1233
1234static int perf_stat__get_core(struct cpu_map *map, int cpu)
1235{
1236 return cpu_map__get_core(map, cpu, NULL);
1237}
1238
1e5a2931
JO
1239static int cpu_map__get_max(struct cpu_map *map)
1240{
1241 int i, max = -1;
1242
1243 for (i = 0; i < map->nr; i++) {
1244 if (map->map[i] > max)
1245 max = map->map[i];
1246 }
1247
1248 return max;
1249}
1250
1251static struct cpu_map *cpus_aggr_map;
1252
1253static int perf_stat__get_aggr(aggr_get_id_t get_id, struct cpu_map *map, int idx)
1254{
1255 int cpu;
1256
1257 if (idx >= map->nr)
1258 return -1;
1259
1260 cpu = map->map[idx];
1261
1262 if (cpus_aggr_map->map[cpu] == -1)
1263 cpus_aggr_map->map[cpu] = get_id(map, idx);
1264
1265 return cpus_aggr_map->map[cpu];
1266}
1267
1268static int perf_stat__get_socket_cached(struct cpu_map *map, int idx)
1269{
1270 return perf_stat__get_aggr(perf_stat__get_socket, map, idx);
1271}
1272
1273static int perf_stat__get_core_cached(struct cpu_map *map, int idx)
1274{
1275 return perf_stat__get_aggr(perf_stat__get_core, map, idx);
1276}
1277
86ee6e18
SE
1278static int perf_stat_init_aggr_mode(void)
1279{
1e5a2931
JO
1280 int nr;
1281
421a50f3 1282 switch (stat_config.aggr_mode) {
86ee6e18
SE
1283 case AGGR_SOCKET:
1284 if (cpu_map__build_socket_map(evsel_list->cpus, &aggr_map)) {
1285 perror("cannot build socket map");
1286 return -1;
1287 }
1e5a2931 1288 aggr_get_id = perf_stat__get_socket_cached;
86ee6e18 1289 break;
12c08a9f
SE
1290 case AGGR_CORE:
1291 if (cpu_map__build_core_map(evsel_list->cpus, &aggr_map)) {
1292 perror("cannot build core map");
1293 return -1;
1294 }
1e5a2931 1295 aggr_get_id = perf_stat__get_core_cached;
12c08a9f 1296 break;
86ee6e18
SE
1297 case AGGR_NONE:
1298 case AGGR_GLOBAL:
32b8af82 1299 case AGGR_THREAD:
208df99e 1300 case AGGR_UNSET:
86ee6e18
SE
1301 default:
1302 break;
1303 }
1e5a2931
JO
1304
1305 /*
1306 * The evsel_list->cpus is the base we operate on,
1307 * taking the highest cpu number to be the size of
1308 * the aggregation translate cpumap.
1309 */
1310 nr = cpu_map__get_max(evsel_list->cpus);
1311 cpus_aggr_map = cpu_map__empty_new(nr + 1);
1312 return cpus_aggr_map ? 0 : -ENOMEM;
86ee6e18
SE
1313}
1314
544c2ae7
MH
1315static void perf_stat__exit_aggr_mode(void)
1316{
1317 cpu_map__put(aggr_map);
1318 cpu_map__put(cpus_aggr_map);
1319 aggr_map = NULL;
1320 cpus_aggr_map = NULL;
1321}
1322
2cba3ffb
IM
1323/*
1324 * Add default attributes, if there were no attributes specified or
1325 * if -d/--detailed, -d -d or -d -d -d is used:
1326 */
1327static int add_default_attributes(void)
1328{
b070a547
ACM
1329 struct perf_event_attr default_attrs[] = {
1330
1331 { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_TASK_CLOCK },
1332 { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_CONTEXT_SWITCHES },
1333 { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_CPU_MIGRATIONS },
1334 { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_PAGE_FAULTS },
1335
1336 { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_CPU_CYCLES },
1337 { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_STALLED_CYCLES_FRONTEND },
1338 { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_STALLED_CYCLES_BACKEND },
1339 { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_INSTRUCTIONS },
1340 { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_BRANCH_INSTRUCTIONS },
1341 { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_BRANCH_MISSES },
1342
1343};
1344
1345/*
1346 * Detailed stats (-d), covering the L1 and last level data caches:
1347 */
1348 struct perf_event_attr detailed_attrs[] = {
1349
1350 { .type = PERF_TYPE_HW_CACHE,
1351 .config =
1352 PERF_COUNT_HW_CACHE_L1D << 0 |
1353 (PERF_COUNT_HW_CACHE_OP_READ << 8) |
1354 (PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16) },
1355
1356 { .type = PERF_TYPE_HW_CACHE,
1357 .config =
1358 PERF_COUNT_HW_CACHE_L1D << 0 |
1359 (PERF_COUNT_HW_CACHE_OP_READ << 8) |
1360 (PERF_COUNT_HW_CACHE_RESULT_MISS << 16) },
1361
1362 { .type = PERF_TYPE_HW_CACHE,
1363 .config =
1364 PERF_COUNT_HW_CACHE_LL << 0 |
1365 (PERF_COUNT_HW_CACHE_OP_READ << 8) |
1366 (PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16) },
1367
1368 { .type = PERF_TYPE_HW_CACHE,
1369 .config =
1370 PERF_COUNT_HW_CACHE_LL << 0 |
1371 (PERF_COUNT_HW_CACHE_OP_READ << 8) |
1372 (PERF_COUNT_HW_CACHE_RESULT_MISS << 16) },
1373};
1374
1375/*
1376 * Very detailed stats (-d -d), covering the instruction cache and the TLB caches:
1377 */
1378 struct perf_event_attr very_detailed_attrs[] = {
1379
1380 { .type = PERF_TYPE_HW_CACHE,
1381 .config =
1382 PERF_COUNT_HW_CACHE_L1I << 0 |
1383 (PERF_COUNT_HW_CACHE_OP_READ << 8) |
1384 (PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16) },
1385
1386 { .type = PERF_TYPE_HW_CACHE,
1387 .config =
1388 PERF_COUNT_HW_CACHE_L1I << 0 |
1389 (PERF_COUNT_HW_CACHE_OP_READ << 8) |
1390 (PERF_COUNT_HW_CACHE_RESULT_MISS << 16) },
1391
1392 { .type = PERF_TYPE_HW_CACHE,
1393 .config =
1394 PERF_COUNT_HW_CACHE_DTLB << 0 |
1395 (PERF_COUNT_HW_CACHE_OP_READ << 8) |
1396 (PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16) },
1397
1398 { .type = PERF_TYPE_HW_CACHE,
1399 .config =
1400 PERF_COUNT_HW_CACHE_DTLB << 0 |
1401 (PERF_COUNT_HW_CACHE_OP_READ << 8) |
1402 (PERF_COUNT_HW_CACHE_RESULT_MISS << 16) },
1403
1404 { .type = PERF_TYPE_HW_CACHE,
1405 .config =
1406 PERF_COUNT_HW_CACHE_ITLB << 0 |
1407 (PERF_COUNT_HW_CACHE_OP_READ << 8) |
1408 (PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16) },
1409
1410 { .type = PERF_TYPE_HW_CACHE,
1411 .config =
1412 PERF_COUNT_HW_CACHE_ITLB << 0 |
1413 (PERF_COUNT_HW_CACHE_OP_READ << 8) |
1414 (PERF_COUNT_HW_CACHE_RESULT_MISS << 16) },
1415
1416};
1417
1418/*
1419 * Very, very detailed stats (-d -d -d), adding prefetch events:
1420 */
1421 struct perf_event_attr very_very_detailed_attrs[] = {
1422
1423 { .type = PERF_TYPE_HW_CACHE,
1424 .config =
1425 PERF_COUNT_HW_CACHE_L1D << 0 |
1426 (PERF_COUNT_HW_CACHE_OP_PREFETCH << 8) |
1427 (PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16) },
1428
1429 { .type = PERF_TYPE_HW_CACHE,
1430 .config =
1431 PERF_COUNT_HW_CACHE_L1D << 0 |
1432 (PERF_COUNT_HW_CACHE_OP_PREFETCH << 8) |
1433 (PERF_COUNT_HW_CACHE_RESULT_MISS << 16) },
1434};
1435
2cba3ffb
IM
1436 /* Set attrs if no event is selected and !null_run: */
1437 if (null_run)
1438 return 0;
1439
4cabc3d1
AK
1440 if (transaction_run) {
1441 int err;
1442 if (pmu_have_event("cpu", "cycles-ct") &&
1443 pmu_have_event("cpu", "el-start"))
a454742c 1444 err = parse_events(evsel_list, transaction_attrs, NULL);
4cabc3d1 1445 else
a454742c
JO
1446 err = parse_events(evsel_list, transaction_limited_attrs, NULL);
1447 if (err) {
4cabc3d1
AK
1448 fprintf(stderr, "Cannot set up transaction events\n");
1449 return -1;
1450 }
1451 return 0;
1452 }
1453
2cba3ffb 1454 if (!evsel_list->nr_entries) {
79695e1b 1455 if (perf_evlist__add_default_attrs(evsel_list, default_attrs) < 0)
50d08e47 1456 return -1;
2cba3ffb
IM
1457 }
1458
1459 /* Detailed events get appended to the event list: */
1460
1461 if (detailed_run < 1)
1462 return 0;
1463
1464 /* Append detailed run extra attributes: */
79695e1b 1465 if (perf_evlist__add_default_attrs(evsel_list, detailed_attrs) < 0)
50d08e47 1466 return -1;
2cba3ffb
IM
1467
1468 if (detailed_run < 2)
1469 return 0;
1470
1471 /* Append very detailed run extra attributes: */
79695e1b 1472 if (perf_evlist__add_default_attrs(evsel_list, very_detailed_attrs) < 0)
50d08e47 1473 return -1;
2cba3ffb
IM
1474
1475 if (detailed_run < 3)
1476 return 0;
1477
1478 /* Append very, very detailed run extra attributes: */
79695e1b 1479 return perf_evlist__add_default_attrs(evsel_list, very_very_detailed_attrs);
2cba3ffb
IM
1480}
1481
4979d0c7
JO
1482static const char * const recort_usage[] = {
1483 "perf stat record [<options>]",
1484 NULL,
1485};
1486
3ba78bd0
JO
1487static void init_features(struct perf_session *session)
1488{
1489 int feat;
1490
1491 for (feat = HEADER_FIRST_FEATURE; feat < HEADER_LAST_FEATURE; feat++)
1492 perf_header__set_feat(&session->header, feat);
1493
1494 perf_header__clear_feat(&session->header, HEADER_BUILD_ID);
1495 perf_header__clear_feat(&session->header, HEADER_TRACING_DATA);
1496 perf_header__clear_feat(&session->header, HEADER_BRANCH_STACK);
1497 perf_header__clear_feat(&session->header, HEADER_AUXTRACE);
1498}
1499
4979d0c7
JO
1500static int __cmd_record(int argc, const char **argv)
1501{
1502 struct perf_session *session;
1503 struct perf_data_file *file = &perf_stat.file;
1504
1505 argc = parse_options(argc, argv, stat_options, record_usage,
1506 PARSE_OPT_STOP_AT_NON_OPTION);
1507
1508 if (output_name)
1509 file->path = output_name;
1510
e9d6db8e
JO
1511 if (run_count != 1 || forever) {
1512 pr_err("Cannot use -r option with perf stat record.\n");
1513 return -1;
1514 }
1515
4979d0c7
JO
1516 session = perf_session__new(file, false, NULL);
1517 if (session == NULL) {
1518 pr_err("Perf session creation failed.\n");
1519 return -1;
1520 }
1521
3ba78bd0
JO
1522 init_features(session);
1523
4979d0c7
JO
1524 session->evlist = evsel_list;
1525 perf_stat.session = session;
1526 perf_stat.record = true;
1527 return argc;
1528}
1529
1d037ca1 1530int cmd_stat(int argc, const char **argv, const char *prefix __maybe_unused)
5242519b 1531{
b070a547
ACM
1532 const char * const stat_usage[] = {
1533 "perf stat [<options>] [<command>]",
1534 NULL
1535 };
cc03c542 1536 int status = -EINVAL, run_idx;
4aa9015f 1537 const char *mode;
5821522e 1538 FILE *output = stderr;
ec0d3d1f 1539 unsigned int interval;
4979d0c7 1540 const char * const stat_subcommands[] = { "record" };
42202dd5 1541
5af52b51
SE
1542 setlocale(LC_ALL, "");
1543
334fe7a3 1544 evsel_list = perf_evlist__new();
361c99a6
ACM
1545 if (evsel_list == NULL)
1546 return -ENOMEM;
1547
4979d0c7
JO
1548 argc = parse_options_subcommand(argc, argv, stat_options, stat_subcommands,
1549 (const char **) stat_usage,
1550 PARSE_OPT_STOP_AT_NON_OPTION);
1551
1552 if (argc && !strncmp(argv[0], "rec", 3)) {
1553 argc = __cmd_record(argc, argv);
1554 if (argc < 0)
1555 return -1;
1556 }
d7470b6a 1557
ec0d3d1f
JO
1558 interval = stat_config.interval;
1559
4979d0c7
JO
1560 /*
1561 * For record command the -o is already taken care of.
1562 */
1563 if (!STAT_RECORD && output_name && strcmp(output_name, "-"))
4aa9015f
SE
1564 output = NULL;
1565
56f3bae7
JC
1566 if (output_name && output_fd) {
1567 fprintf(stderr, "cannot use both --output and --log-fd\n");
e0547311
JO
1568 parse_options_usage(stat_usage, stat_options, "o", 1);
1569 parse_options_usage(NULL, stat_options, "log-fd", 0);
cc03c542 1570 goto out;
56f3bae7 1571 }
fc3e4d07
SE
1572
1573 if (output_fd < 0) {
1574 fprintf(stderr, "argument to --log-fd must be a > 0\n");
e0547311 1575 parse_options_usage(stat_usage, stat_options, "log-fd", 0);
cc03c542 1576 goto out;
fc3e4d07
SE
1577 }
1578
4aa9015f
SE
1579 if (!output) {
1580 struct timespec tm;
1581 mode = append_file ? "a" : "w";
1582
1583 output = fopen(output_name, mode);
1584 if (!output) {
1585 perror("failed to create output file");
fceda7fe 1586 return -1;
4aa9015f
SE
1587 }
1588 clock_gettime(CLOCK_REALTIME, &tm);
1589 fprintf(output, "# started on %s\n", ctime(&tm.tv_sec));
fc3e4d07 1590 } else if (output_fd > 0) {
56f3bae7
JC
1591 mode = append_file ? "a" : "w";
1592 output = fdopen(output_fd, mode);
1593 if (!output) {
1594 perror("Failed opening logfd");
1595 return -errno;
1596 }
4aa9015f
SE
1597 }
1598
5821522e
JO
1599 stat_config.output = output;
1600
d4ffd04d 1601 if (csv_sep) {
d7470b6a 1602 csv_output = true;
d4ffd04d
JC
1603 if (!strcmp(csv_sep, "\\t"))
1604 csv_sep = "\t";
1605 } else
d7470b6a
SE
1606 csv_sep = DEFAULT_SEPARATOR;
1607
1608 /*
1609 * let the spreadsheet do the pretty-printing
1610 */
1611 if (csv_output) {
61a9f324 1612 /* User explicitly passed -B? */
d7470b6a
SE
1613 if (big_num_opt == 1) {
1614 fprintf(stderr, "-B option not supported with -x\n");
e0547311
JO
1615 parse_options_usage(stat_usage, stat_options, "B", 1);
1616 parse_options_usage(NULL, stat_options, "x", 1);
cc03c542 1617 goto out;
d7470b6a
SE
1618 } else /* Nope, so disable big number formatting */
1619 big_num = false;
1620 } else if (big_num_opt == 0) /* User passed --no-big-num */
1621 big_num = false;
1622
602ad878 1623 if (!argc && target__none(&target))
e0547311 1624 usage_with_options(stat_usage, stat_options);
ac3063bd 1625
a7e191c3 1626 if (run_count < 0) {
cc03c542 1627 pr_err("Run count must be a positive number\n");
e0547311 1628 parse_options_usage(stat_usage, stat_options, "r", 1);
cc03c542 1629 goto out;
a7e191c3
FD
1630 } else if (run_count == 0) {
1631 forever = true;
1632 run_count = 1;
1633 }
ddcacfa0 1634
421a50f3 1635 if ((stat_config.aggr_mode == AGGR_THREAD) && !target__has_task(&target)) {
32b8af82
JO
1636 fprintf(stderr, "The --per-thread option is only available "
1637 "when monitoring via -p -t options.\n");
e0547311
JO
1638 parse_options_usage(NULL, stat_options, "p", 1);
1639 parse_options_usage(NULL, stat_options, "t", 1);
32b8af82
JO
1640 goto out;
1641 }
1642
1643 /*
1644 * no_aggr, cgroup are for system-wide only
1645 * --per-thread is aggregated per thread, we dont mix it with cpu mode
1646 */
421a50f3
JO
1647 if (((stat_config.aggr_mode != AGGR_GLOBAL &&
1648 stat_config.aggr_mode != AGGR_THREAD) || nr_cgroups) &&
602ad878 1649 !target__has_cpu(&target)) {
023695d9
SE
1650 fprintf(stderr, "both cgroup and no-aggregation "
1651 "modes only available in system-wide mode\n");
1652
e0547311
JO
1653 parse_options_usage(stat_usage, stat_options, "G", 1);
1654 parse_options_usage(NULL, stat_options, "A", 1);
1655 parse_options_usage(NULL, stat_options, "a", 1);
cc03c542 1656 goto out;
d7e7a451
SE
1657 }
1658
2cba3ffb
IM
1659 if (add_default_attributes())
1660 goto out;
ddcacfa0 1661
602ad878 1662 target__validate(&target);
5c98d466 1663
77a6f014 1664 if (perf_evlist__create_maps(evsel_list, &target) < 0) {
602ad878 1665 if (target__has_task(&target)) {
77a6f014 1666 pr_err("Problems finding threads of monitor\n");
e0547311
JO
1667 parse_options_usage(stat_usage, stat_options, "p", 1);
1668 parse_options_usage(NULL, stat_options, "t", 1);
602ad878 1669 } else if (target__has_cpu(&target)) {
77a6f014 1670 perror("failed to parse CPUs map");
e0547311
JO
1671 parse_options_usage(stat_usage, stat_options, "C", 1);
1672 parse_options_usage(NULL, stat_options, "a", 1);
cc03c542
NK
1673 }
1674 goto out;
60d567e2 1675 }
32b8af82
JO
1676
1677 /*
1678 * Initialize thread_map with comm names,
1679 * so we could print it out on output.
1680 */
421a50f3 1681 if (stat_config.aggr_mode == AGGR_THREAD)
32b8af82
JO
1682 thread_map__read_comms(evsel_list->threads);
1683
13370a9b 1684 if (interval && interval < 100) {
19afd104
KL
1685 if (interval < 10) {
1686 pr_err("print interval must be >= 10ms\n");
e0547311 1687 parse_options_usage(stat_usage, stat_options, "I", 1);
19afd104
KL
1688 goto out;
1689 } else
1690 pr_warning("print interval < 100ms. "
1691 "The overhead percentage could be high in some cases. "
1692 "Please proceed with caution.\n");
13370a9b 1693 }
c45c6ea2 1694
d134ffb9 1695 if (perf_evlist__alloc_stats(evsel_list, interval))
03ad9747 1696 goto out;
d6d901c2 1697
86ee6e18 1698 if (perf_stat_init_aggr_mode())
03ad9747 1699 goto out;
86ee6e18 1700
58d7e993
IM
1701 /*
1702 * We dont want to block the signals - that would cause
1703 * child tasks to inherit that and Ctrl-C would not work.
1704 * What we want is for Ctrl-C to work in the exec()-ed
1705 * task, but being ignored by perf stat itself:
1706 */
f7b7c26e 1707 atexit(sig_atexit);
a7e191c3
FD
1708 if (!forever)
1709 signal(SIGINT, skip_signal);
13370a9b 1710 signal(SIGCHLD, skip_signal);
58d7e993
IM
1711 signal(SIGALRM, skip_signal);
1712 signal(SIGABRT, skip_signal);
1713
42202dd5 1714 status = 0;
a7e191c3 1715 for (run_idx = 0; forever || run_idx < run_count; run_idx++) {
42202dd5 1716 if (run_count != 1 && verbose)
4aa9015f
SE
1717 fprintf(output, "[ perf stat: executing run #%d ... ]\n",
1718 run_idx + 1);
f9cef0a9 1719
42202dd5 1720 status = run_perf_stat(argc, argv);
a7e191c3 1721 if (forever && status != -1) {
d4f63a47 1722 print_counters(NULL, argc, argv);
254ecbc7 1723 perf_stat__reset_stats();
a7e191c3 1724 }
42202dd5
IM
1725 }
1726
a7e191c3 1727 if (!forever && status != -1 && !interval)
d4f63a47 1728 print_counters(NULL, argc, argv);
d134ffb9 1729
4979d0c7
JO
1730 if (STAT_RECORD) {
1731 /*
1732 * We synthesize the kernel mmap record just so that older tools
1733 * don't emit warnings about not being able to resolve symbols
1734 * due to /proc/sys/kernel/kptr_restrict settings and instear provide
1735 * a saner message about no samples being in the perf.data file.
1736 *
1737 * This also serves to suppress a warning about f_header.data.size == 0
8b99b1a4
JO
1738 * in header.c at the moment 'perf stat record' gets introduced, which
1739 * is not really needed once we start adding the stat specific PERF_RECORD_
1740 * records, but the need to suppress the kptr_restrict messages in older
1741 * tools remain -acme
4979d0c7
JO
1742 */
1743 int fd = perf_data_file__fd(&perf_stat.file);
1744 int err = perf_event__synthesize_kernel_mmap((void *)&perf_stat,
1745 process_synthesized_event,
1746 &perf_stat.session->machines.host);
1747 if (err) {
1748 pr_warning("Couldn't synthesize the kernel mmap record, harmless, "
1749 "older tools may produce warnings about this file\n.");
1750 }
1751
7aad0c32
JO
1752 if (!interval) {
1753 if (WRITE_STAT_ROUND_EVENT(walltime_nsecs_stats.max, FINAL))
1754 pr_err("failed to write stat round event\n");
1755 }
1756
664c98d4
JO
1757 if (!perf_stat.file.is_pipe) {
1758 perf_stat.session->header.data_size += perf_stat.bytes_written;
1759 perf_session__write_header(perf_stat.session, evsel_list, fd, true);
1760 }
4979d0c7
JO
1761
1762 perf_session__delete(perf_stat.session);
1763 }
1764
544c2ae7 1765 perf_stat__exit_aggr_mode();
d134ffb9 1766 perf_evlist__free_stats(evsel_list);
0015e2e1
ACM
1767out:
1768 perf_evlist__delete(evsel_list);
42202dd5 1769 return status;
ddcacfa0 1770}