]> git.proxmox.com Git - mirror_ubuntu-focal-kernel.git/blame - tools/perf/builtin-stat.c
perf evlist: Use cpu_map__nr() helper
[mirror_ubuntu-focal-kernel.git] / tools / perf / builtin-stat.c
CommitLineData
ddcacfa0 1/*
bf9e1876
IM
2 * builtin-stat.c
3 *
4 * Builtin stat command: Give a precise performance counters summary
5 * overview about any workload, CPU or specific PID.
6 *
7 * Sample output:
ddcacfa0 8
2cba3ffb 9 $ perf stat ./hackbench 10
ddcacfa0 10
2cba3ffb 11 Time: 0.118
ddcacfa0 12
2cba3ffb 13 Performance counter stats for './hackbench 10':
ddcacfa0 14
2cba3ffb
IM
15 1708.761321 task-clock # 11.037 CPUs utilized
16 41,190 context-switches # 0.024 M/sec
17 6,735 CPU-migrations # 0.004 M/sec
18 17,318 page-faults # 0.010 M/sec
19 5,205,202,243 cycles # 3.046 GHz
20 3,856,436,920 stalled-cycles-frontend # 74.09% frontend cycles idle
21 1,600,790,871 stalled-cycles-backend # 30.75% backend cycles idle
22 2,603,501,247 instructions # 0.50 insns per cycle
23 # 1.48 stalled cycles per insn
24 484,357,498 branches # 283.455 M/sec
25 6,388,934 branch-misses # 1.32% of all branches
26
27 0.154822978 seconds time elapsed
ddcacfa0 28
5242519b 29 *
2cba3ffb 30 * Copyright (C) 2008-2011, Red Hat Inc, Ingo Molnar <mingo@redhat.com>
5242519b
IM
31 *
32 * Improvements and fixes by:
33 *
34 * Arjan van de Ven <arjan@linux.intel.com>
35 * Yanmin Zhang <yanmin.zhang@intel.com>
36 * Wu Fengguang <fengguang.wu@intel.com>
37 * Mike Galbraith <efault@gmx.de>
38 * Paul Mackerras <paulus@samba.org>
6e750a8f 39 * Jaswinder Singh Rajput <jaswinder@kernel.org>
5242519b
IM
40 *
41 * Released under the GPL v2. (and only v2, not any later version)
ddcacfa0
IM
42 */
43
1a482f38 44#include "perf.h"
16f762a2 45#include "builtin.h"
148be2c1 46#include "util/util.h"
5242519b
IM
47#include "util/parse-options.h"
48#include "util/parse-events.h"
8f28827a 49#include "util/event.h"
361c99a6 50#include "util/evlist.h"
69aad6f1 51#include "util/evsel.h"
8f28827a 52#include "util/debug.h"
a5d243d0 53#include "util/color.h"
0007ecea 54#include "util/stat.h"
60666c63 55#include "util/header.h"
a12b51c4 56#include "util/cpumap.h"
d6d901c2 57#include "util/thread.h"
fd78260b 58#include "util/thread_map.h"
ddcacfa0 59
1f16c575 60#include <stdlib.h>
ddcacfa0 61#include <sys/prctl.h>
5af52b51 62#include <locale.h>
16c8a109 63
d7470b6a 64#define DEFAULT_SEPARATOR " "
2cee77c4
DA
65#define CNTR_NOT_SUPPORTED "<not supported>"
66#define CNTR_NOT_COUNTED "<not counted>"
d7470b6a 67
13370a9b
SE
68static void print_stat(int argc, const char **argv);
69static void print_counter_aggr(struct perf_evsel *counter, char *prefix);
70static void print_counter(struct perf_evsel *counter, char *prefix);
d7e7a451 71static void print_aggr_socket(char *prefix);
13370a9b 72
666e6d48 73static struct perf_evlist *evsel_list;
361c99a6 74
77a6f014
NK
75static struct perf_target target = {
76 .uid = UINT_MAX,
77};
ddcacfa0 78
3d632595 79static int run_count = 1;
2e6cdf99 80static bool no_inherit = false;
c0555642 81static bool scale = true;
f5b4a9c3 82static bool no_aggr = false;
d7e7a451 83static bool aggr_socket = false;
933da83a 84static pid_t child_pid = -1;
c0555642 85static bool null_run = false;
2cba3ffb 86static int detailed_run = 0;
201e0b06 87static bool big_num = true;
d7470b6a 88static int big_num_opt = -1;
d7470b6a
SE
89static const char *csv_sep = NULL;
90static bool csv_output = false;
43bece79 91static bool group = false;
4aa9015f 92static FILE *output = NULL;
1f16c575
PZ
93static const char *pre_cmd = NULL;
94static const char *post_cmd = NULL;
95static bool sync_run = false;
13370a9b
SE
96static unsigned int interval = 0;
97static struct timespec ref_time;
d7e7a451 98static struct cpu_map *sock_map;
5af52b51 99
60666c63
LW
100static volatile int done = 0;
101
69aad6f1
ACM
102struct perf_stat {
103 struct stats res_stats[3];
69aad6f1
ACM
104};
105
13370a9b
SE
106static inline void diff_timespec(struct timespec *r, struct timespec *a,
107 struct timespec *b)
108{
109 r->tv_sec = a->tv_sec - b->tv_sec;
110 if (a->tv_nsec < b->tv_nsec) {
111 r->tv_nsec = a->tv_nsec + 1000000000L - b->tv_nsec;
112 r->tv_sec--;
113 } else {
114 r->tv_nsec = a->tv_nsec - b->tv_nsec ;
115 }
116}
117
118static inline struct cpu_map *perf_evsel__cpus(struct perf_evsel *evsel)
119{
120 return (evsel->cpus && !target.cpu_list) ? evsel->cpus : evsel_list->cpus;
121}
122
123static inline int perf_evsel__nr_cpus(struct perf_evsel *evsel)
124{
125 return perf_evsel__cpus(evsel)->nr;
126}
127
c52b12ed 128static int perf_evsel__alloc_stat_priv(struct perf_evsel *evsel)
69aad6f1 129{
c52b12ed 130 evsel->priv = zalloc(sizeof(struct perf_stat));
69aad6f1
ACM
131 return evsel->priv == NULL ? -ENOMEM : 0;
132}
133
134static void perf_evsel__free_stat_priv(struct perf_evsel *evsel)
135{
136 free(evsel->priv);
137 evsel->priv = NULL;
138}
139
13370a9b 140static int perf_evsel__alloc_prev_raw_counts(struct perf_evsel *evsel)
7ae92e74 141{
13370a9b
SE
142 void *addr;
143 size_t sz;
144
145 sz = sizeof(*evsel->counts) +
146 (perf_evsel__nr_cpus(evsel) * sizeof(struct perf_counts_values));
147
148 addr = zalloc(sz);
149 if (!addr)
150 return -ENOMEM;
151
152 evsel->prev_raw_counts = addr;
153
154 return 0;
7ae92e74
YZ
155}
156
13370a9b 157static void perf_evsel__free_prev_raw_counts(struct perf_evsel *evsel)
7ae92e74 158{
13370a9b
SE
159 free(evsel->prev_raw_counts);
160 evsel->prev_raw_counts = NULL;
7ae92e74
YZ
161}
162
666e6d48
RR
163static struct stats runtime_nsecs_stats[MAX_NR_CPUS];
164static struct stats runtime_cycles_stats[MAX_NR_CPUS];
165static struct stats runtime_stalled_cycles_front_stats[MAX_NR_CPUS];
166static struct stats runtime_stalled_cycles_back_stats[MAX_NR_CPUS];
167static struct stats runtime_branches_stats[MAX_NR_CPUS];
168static struct stats runtime_cacherefs_stats[MAX_NR_CPUS];
169static struct stats runtime_l1_dcache_stats[MAX_NR_CPUS];
170static struct stats runtime_l1_icache_stats[MAX_NR_CPUS];
171static struct stats runtime_ll_cache_stats[MAX_NR_CPUS];
172static struct stats runtime_itlb_cache_stats[MAX_NR_CPUS];
173static struct stats runtime_dtlb_cache_stats[MAX_NR_CPUS];
174static struct stats walltime_nsecs_stats;
be1ac0d8 175
cac21425 176static int create_perf_stat_counter(struct perf_evsel *evsel)
ddcacfa0 177{
69aad6f1 178 struct perf_event_attr *attr = &evsel->attr;
727ab04e 179
ddcacfa0 180 if (scale)
a21ca2ca
IM
181 attr->read_format = PERF_FORMAT_TOTAL_TIME_ENABLED |
182 PERF_FORMAT_TOTAL_TIME_RUNNING;
ddcacfa0 183
5d2cd909
ACM
184 attr->inherit = !no_inherit;
185
594ac61a
ACM
186 if (perf_target__has_cpu(&target))
187 return perf_evsel__open_per_cpu(evsel, perf_evsel__cpus(evsel));
5622c07b 188
07ac002f 189 if (!perf_target__has_task(&target) &&
823254ed 190 perf_evsel__is_group_leader(evsel)) {
48290609
ACM
191 attr->disabled = 1;
192 attr->enable_on_exec = 1;
ddcacfa0 193 }
084ab9f8 194
594ac61a 195 return perf_evsel__open_per_thread(evsel, evsel_list->threads);
ddcacfa0
IM
196}
197
c04f5e5d
IM
198/*
199 * Does the counter have nsecs as a unit?
200 */
daec78a0 201static inline int nsec_counter(struct perf_evsel *evsel)
c04f5e5d 202{
daec78a0
ACM
203 if (perf_evsel__match(evsel, SOFTWARE, SW_CPU_CLOCK) ||
204 perf_evsel__match(evsel, SOFTWARE, SW_TASK_CLOCK))
c04f5e5d
IM
205 return 1;
206
207 return 0;
208}
209
dcd9936a
IM
210/*
211 * Update various tracking values we maintain to print
212 * more semantic information such as miss/hit ratios,
213 * instruction rates, etc:
214 */
215static void update_shadow_stats(struct perf_evsel *counter, u64 *count)
216{
217 if (perf_evsel__match(counter, SOFTWARE, SW_TASK_CLOCK))
218 update_stats(&runtime_nsecs_stats[0], count[0]);
219 else if (perf_evsel__match(counter, HARDWARE, HW_CPU_CYCLES))
220 update_stats(&runtime_cycles_stats[0], count[0]);
d3d1e86d
IM
221 else if (perf_evsel__match(counter, HARDWARE, HW_STALLED_CYCLES_FRONTEND))
222 update_stats(&runtime_stalled_cycles_front_stats[0], count[0]);
129c04cb 223 else if (perf_evsel__match(counter, HARDWARE, HW_STALLED_CYCLES_BACKEND))
d3d1e86d 224 update_stats(&runtime_stalled_cycles_back_stats[0], count[0]);
dcd9936a
IM
225 else if (perf_evsel__match(counter, HARDWARE, HW_BRANCH_INSTRUCTIONS))
226 update_stats(&runtime_branches_stats[0], count[0]);
227 else if (perf_evsel__match(counter, HARDWARE, HW_CACHE_REFERENCES))
228 update_stats(&runtime_cacherefs_stats[0], count[0]);
8bb6c79f
IM
229 else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_L1D))
230 update_stats(&runtime_l1_dcache_stats[0], count[0]);
c3305257
IM
231 else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_L1I))
232 update_stats(&runtime_l1_icache_stats[0], count[0]);
233 else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_LL))
234 update_stats(&runtime_ll_cache_stats[0], count[0]);
235 else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_DTLB))
236 update_stats(&runtime_dtlb_cache_stats[0], count[0]);
237 else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_ITLB))
238 update_stats(&runtime_itlb_cache_stats[0], count[0]);
dcd9936a
IM
239}
240
c04f5e5d 241/*
2996f5dd 242 * Read out the results of a single counter:
f5b4a9c3 243 * aggregate counts across CPUs in system-wide mode
c04f5e5d 244 */
c52b12ed 245static int read_counter_aggr(struct perf_evsel *counter)
c04f5e5d 246{
69aad6f1 247 struct perf_stat *ps = counter->priv;
c52b12ed
ACM
248 u64 *count = counter->counts->aggr.values;
249 int i;
2996f5dd 250
7ae92e74 251 if (__perf_evsel__read(counter, perf_evsel__nr_cpus(counter),
7e2ed097 252 evsel_list->threads->nr, scale) < 0)
c52b12ed 253 return -1;
9e9772c4
PZ
254
255 for (i = 0; i < 3; i++)
69aad6f1 256 update_stats(&ps->res_stats[i], count[i]);
9e9772c4
PZ
257
258 if (verbose) {
4aa9015f 259 fprintf(output, "%s: %" PRIu64 " %" PRIu64 " %" PRIu64 "\n",
7289f83c 260 perf_evsel__name(counter), count[0], count[1], count[2]);
9e9772c4
PZ
261 }
262
be1ac0d8
IM
263 /*
264 * Save the full runtime - to allow normalization during printout:
265 */
dcd9936a 266 update_shadow_stats(counter, count);
c52b12ed
ACM
267
268 return 0;
f5b4a9c3
SE
269}
270
271/*
272 * Read out the results of a single counter:
273 * do not aggregate counts across CPUs in system-wide mode
274 */
c52b12ed 275static int read_counter(struct perf_evsel *counter)
f5b4a9c3 276{
c52b12ed 277 u64 *count;
f5b4a9c3 278 int cpu;
f5b4a9c3 279
7ae92e74 280 for (cpu = 0; cpu < perf_evsel__nr_cpus(counter); cpu++) {
c52b12ed
ACM
281 if (__perf_evsel__read_on_cpu(counter, cpu, 0, scale) < 0)
282 return -1;
f5b4a9c3 283
c52b12ed 284 count = counter->counts->cpu[cpu].values;
f5b4a9c3 285
dcd9936a 286 update_shadow_stats(counter, count);
f5b4a9c3 287 }
c52b12ed
ACM
288
289 return 0;
2996f5dd
IM
290}
291
13370a9b
SE
292static void print_interval(void)
293{
294 static int num_print_interval;
295 struct perf_evsel *counter;
296 struct perf_stat *ps;
297 struct timespec ts, rs;
298 char prefix[64];
299
300 if (no_aggr) {
301 list_for_each_entry(counter, &evsel_list->entries, node) {
302 ps = counter->priv;
303 memset(ps->res_stats, 0, sizeof(ps->res_stats));
304 read_counter(counter);
305 }
306 } else {
307 list_for_each_entry(counter, &evsel_list->entries, node) {
308 ps = counter->priv;
309 memset(ps->res_stats, 0, sizeof(ps->res_stats));
310 read_counter_aggr(counter);
311 }
312 }
313 clock_gettime(CLOCK_MONOTONIC, &ts);
314 diff_timespec(&rs, &ts, &ref_time);
315 sprintf(prefix, "%6lu.%09lu%s", rs.tv_sec, rs.tv_nsec, csv_sep);
316
317 if (num_print_interval == 0 && !csv_output) {
d7e7a451
SE
318 if (aggr_socket)
319 fprintf(output, "# time socket cpus counts events\n");
320 else if (no_aggr)
13370a9b
SE
321 fprintf(output, "# time CPU counts events\n");
322 else
323 fprintf(output, "# time counts events\n");
324 }
325
326 if (++num_print_interval == 25)
327 num_print_interval = 0;
328
d7e7a451
SE
329 if (aggr_socket)
330 print_aggr_socket(prefix);
331 else if (no_aggr) {
13370a9b
SE
332 list_for_each_entry(counter, &evsel_list->entries, node)
333 print_counter(counter, prefix);
334 } else {
335 list_for_each_entry(counter, &evsel_list->entries, node)
336 print_counter_aggr(counter, prefix);
337 }
338}
339
1f16c575 340static int __run_perf_stat(int argc __maybe_unused, const char **argv)
42202dd5 341{
56e52e85 342 char msg[512];
42202dd5 343 unsigned long long t0, t1;
cac21425 344 struct perf_evsel *counter;
13370a9b 345 struct timespec ts;
42202dd5 346 int status = 0;
051ae7f7 347 int child_ready_pipe[2], go_pipe[2];
6be2850e 348 const bool forks = (argc > 0);
051ae7f7 349 char buf;
42202dd5 350
13370a9b
SE
351 if (interval) {
352 ts.tv_sec = interval / 1000;
353 ts.tv_nsec = (interval % 1000) * 1000000;
354 } else {
355 ts.tv_sec = 1;
356 ts.tv_nsec = 0;
357 }
358
d7e7a451
SE
359 if (aggr_socket
360 && cpu_map__build_socket_map(evsel_list->cpus, &sock_map)) {
361 perror("cannot build socket map");
362 return -1;
363 }
364
60666c63 365 if (forks && (pipe(child_ready_pipe) < 0 || pipe(go_pipe) < 0)) {
051ae7f7 366 perror("failed to create pipes");
fceda7fe 367 return -1;
051ae7f7
PM
368 }
369
60666c63 370 if (forks) {
6be2850e 371 if ((child_pid = fork()) < 0)
60666c63
LW
372 perror("failed to fork");
373
6be2850e 374 if (!child_pid) {
60666c63
LW
375 close(child_ready_pipe[0]);
376 close(go_pipe[1]);
377 fcntl(go_pipe[0], F_SETFD, FD_CLOEXEC);
378
379 /*
380 * Do a dummy execvp to get the PLT entry resolved,
381 * so we avoid the resolver overhead on the real
382 * execvp call.
383 */
384 execvp("", (char **)argv);
385
386 /*
387 * Tell the parent we're ready to go
388 */
389 close(child_ready_pipe[1]);
390
391 /*
392 * Wait until the parent tells us to go.
393 */
394 if (read(go_pipe[0], &buf, 1) == -1)
395 perror("unable to read pipe");
396
397 execvp(argv[0], (char **)argv);
398
399 perror(argv[0]);
400 exit(-1);
401 }
051ae7f7 402
d67356e7 403 if (perf_target__none(&target))
7e2ed097 404 evsel_list->threads->map[0] = child_pid;
d6d901c2 405
051ae7f7 406 /*
60666c63 407 * Wait for the child to be ready to exec.
051ae7f7
PM
408 */
409 close(child_ready_pipe[1]);
60666c63
LW
410 close(go_pipe[0]);
411 if (read(child_ready_pipe[0], &buf, 1) == -1)
a92bef0f 412 perror("unable to read pipe");
60666c63 413 close(child_ready_pipe[0]);
051ae7f7
PM
414 }
415
6a4bb04c 416 if (group)
63dab225 417 perf_evlist__set_leader(evsel_list);
6a4bb04c 418
361c99a6 419 list_for_each_entry(counter, &evsel_list->entries, node) {
cac21425 420 if (create_perf_stat_counter(counter) < 0) {
979987a5
DA
421 /*
422 * PPC returns ENXIO for HW counters until 2.6.37
423 * (behavior changed with commit b0a873e).
424 */
38f6ae1e 425 if (errno == EINVAL || errno == ENOSYS ||
979987a5
DA
426 errno == ENOENT || errno == EOPNOTSUPP ||
427 errno == ENXIO) {
c63ca0c0
DA
428 if (verbose)
429 ui__warning("%s event is not supported by the kernel.\n",
7289f83c 430 perf_evsel__name(counter));
2cee77c4 431 counter->supported = false;
ede70290 432 continue;
c63ca0c0 433 }
ede70290 434
56e52e85
ACM
435 perf_evsel__open_strerror(counter, &target,
436 errno, msg, sizeof(msg));
437 ui__error("%s\n", msg);
438
48290609
ACM
439 if (child_pid != -1)
440 kill(child_pid, SIGTERM);
fceda7fe 441
48290609
ACM
442 return -1;
443 }
2cee77c4 444 counter->supported = true;
084ab9f8 445 }
42202dd5 446
1491a632 447 if (perf_evlist__apply_filters(evsel_list)) {
cfd748ae
FW
448 error("failed to set filter with %d (%s)\n", errno,
449 strerror(errno));
450 return -1;
451 }
452
42202dd5
IM
453 /*
454 * Enable counters and exec the command:
455 */
456 t0 = rdclock();
13370a9b 457 clock_gettime(CLOCK_MONOTONIC, &ref_time);
42202dd5 458
60666c63
LW
459 if (forks) {
460 close(go_pipe[1]);
13370a9b
SE
461 if (interval) {
462 while (!waitpid(child_pid, &status, WNOHANG)) {
463 nanosleep(&ts, NULL);
464 print_interval();
465 }
466 }
60666c63 467 wait(&status);
33e49ea7
AK
468 if (WIFSIGNALED(status))
469 psignal(WTERMSIG(status), argv[0]);
60666c63 470 } else {
13370a9b
SE
471 while (!done) {
472 nanosleep(&ts, NULL);
473 if (interval)
474 print_interval();
475 }
60666c63 476 }
42202dd5 477
42202dd5
IM
478 t1 = rdclock();
479
9e9772c4 480 update_stats(&walltime_nsecs_stats, t1 - t0);
42202dd5 481
f5b4a9c3 482 if (no_aggr) {
361c99a6 483 list_for_each_entry(counter, &evsel_list->entries, node) {
f5b4a9c3 484 read_counter(counter);
7ae92e74 485 perf_evsel__close_fd(counter, perf_evsel__nr_cpus(counter), 1);
c52b12ed 486 }
f5b4a9c3 487 } else {
361c99a6 488 list_for_each_entry(counter, &evsel_list->entries, node) {
f5b4a9c3 489 read_counter_aggr(counter);
7ae92e74 490 perf_evsel__close_fd(counter, perf_evsel__nr_cpus(counter),
7e2ed097 491 evsel_list->threads->nr);
c52b12ed 492 }
f5b4a9c3 493 }
c52b12ed 494
42202dd5
IM
495 return WEXITSTATUS(status);
496}
497
1f16c575
PZ
498static int run_perf_stat(int argc __maybe_unused, const char **argv)
499{
500 int ret;
501
502 if (pre_cmd) {
503 ret = system(pre_cmd);
504 if (ret)
505 return ret;
506 }
507
508 if (sync_run)
509 sync();
510
511 ret = __run_perf_stat(argc, argv);
512 if (ret)
513 return ret;
514
515 if (post_cmd) {
516 ret = system(post_cmd);
517 if (ret)
518 return ret;
519 }
520
521 return ret;
522}
523
f99844cb
IM
524static void print_noise_pct(double total, double avg)
525{
0007ecea 526 double pct = rel_stddev_stats(total, avg);
f99844cb 527
3ae9a34d 528 if (csv_output)
4aa9015f 529 fprintf(output, "%s%.2f%%", csv_sep, pct);
a1bca6cc 530 else if (pct)
4aa9015f 531 fprintf(output, " ( +-%6.2f%% )", pct);
f99844cb
IM
532}
533
69aad6f1 534static void print_noise(struct perf_evsel *evsel, double avg)
42202dd5 535{
69aad6f1
ACM
536 struct perf_stat *ps;
537
849abde9
PZ
538 if (run_count == 1)
539 return;
540
69aad6f1 541 ps = evsel->priv;
f99844cb 542 print_noise_pct(stddev_stats(&ps->res_stats[0]), avg);
42202dd5
IM
543}
544
d7e7a451 545static void nsec_printout(int cpu, int nr, struct perf_evsel *evsel, double avg)
44175b6f 546{
506d4bc8 547 double msecs = avg / 1e6;
d7470b6a 548 char cpustr[16] = { '\0', };
2cba3ffb 549 const char *fmt = csv_output ? "%s%.6f%s%s" : "%s%18.6f%s%-25s";
44175b6f 550
d7e7a451
SE
551 if (aggr_socket)
552 sprintf(cpustr, "S%*d%s%*d%s",
553 csv_output ? 0 : -5,
554 cpu,
555 csv_sep,
556 csv_output ? 0 : 4,
557 nr,
558 csv_sep);
559 else if (no_aggr)
d7470b6a
SE
560 sprintf(cpustr, "CPU%*d%s",
561 csv_output ? 0 : -4,
7ae92e74 562 perf_evsel__cpus(evsel)->map[cpu], csv_sep);
d7470b6a 563
7289f83c 564 fprintf(output, fmt, cpustr, msecs, csv_sep, perf_evsel__name(evsel));
d7470b6a 565
023695d9 566 if (evsel->cgrp)
4aa9015f 567 fprintf(output, "%s%s", csv_sep, evsel->cgrp->name);
023695d9 568
13370a9b 569 if (csv_output || interval)
d7470b6a 570 return;
44175b6f 571
daec78a0 572 if (perf_evsel__match(evsel, SOFTWARE, SW_TASK_CLOCK))
4aa9015f
SE
573 fprintf(output, " # %8.3f CPUs utilized ",
574 avg / avg_stats(&walltime_nsecs_stats));
9dac6a29
NK
575 else
576 fprintf(output, " ");
44175b6f
IM
577}
578
15e6392f
NK
579/* used for get_ratio_color() */
580enum grc_type {
581 GRC_STALLED_CYCLES_FE,
582 GRC_STALLED_CYCLES_BE,
583 GRC_CACHE_MISSES,
584 GRC_MAX_NR
585};
586
587static const char *get_ratio_color(enum grc_type type, double ratio)
588{
589 static const double grc_table[GRC_MAX_NR][3] = {
590 [GRC_STALLED_CYCLES_FE] = { 50.0, 30.0, 10.0 },
591 [GRC_STALLED_CYCLES_BE] = { 75.0, 50.0, 20.0 },
592 [GRC_CACHE_MISSES] = { 20.0, 10.0, 5.0 },
593 };
594 const char *color = PERF_COLOR_NORMAL;
595
596 if (ratio > grc_table[type][0])
597 color = PERF_COLOR_RED;
598 else if (ratio > grc_table[type][1])
599 color = PERF_COLOR_MAGENTA;
600 else if (ratio > grc_table[type][2])
601 color = PERF_COLOR_YELLOW;
602
603 return color;
604}
605
1d037ca1
IT
606static void print_stalled_cycles_frontend(int cpu,
607 struct perf_evsel *evsel
608 __maybe_unused, double avg)
d3d1e86d
IM
609{
610 double total, ratio = 0.0;
611 const char *color;
612
613 total = avg_stats(&runtime_cycles_stats[cpu]);
614
615 if (total)
616 ratio = avg / total * 100.0;
617
15e6392f 618 color = get_ratio_color(GRC_STALLED_CYCLES_FE, ratio);
d3d1e86d 619
4aa9015f
SE
620 fprintf(output, " # ");
621 color_fprintf(output, color, "%6.2f%%", ratio);
622 fprintf(output, " frontend cycles idle ");
d3d1e86d
IM
623}
624
1d037ca1
IT
625static void print_stalled_cycles_backend(int cpu,
626 struct perf_evsel *evsel
627 __maybe_unused, double avg)
a5d243d0
IM
628{
629 double total, ratio = 0.0;
630 const char *color;
631
632 total = avg_stats(&runtime_cycles_stats[cpu]);
633
634 if (total)
635 ratio = avg / total * 100.0;
636
15e6392f 637 color = get_ratio_color(GRC_STALLED_CYCLES_BE, ratio);
a5d243d0 638
4aa9015f
SE
639 fprintf(output, " # ");
640 color_fprintf(output, color, "%6.2f%%", ratio);
641 fprintf(output, " backend cycles idle ");
a5d243d0
IM
642}
643
1d037ca1
IT
644static void print_branch_misses(int cpu,
645 struct perf_evsel *evsel __maybe_unused,
646 double avg)
c78df6c1
IM
647{
648 double total, ratio = 0.0;
649 const char *color;
650
651 total = avg_stats(&runtime_branches_stats[cpu]);
652
653 if (total)
654 ratio = avg / total * 100.0;
655
15e6392f 656 color = get_ratio_color(GRC_CACHE_MISSES, ratio);
c78df6c1 657
4aa9015f
SE
658 fprintf(output, " # ");
659 color_fprintf(output, color, "%6.2f%%", ratio);
660 fprintf(output, " of all branches ");
c78df6c1
IM
661}
662
1d037ca1
IT
663static void print_l1_dcache_misses(int cpu,
664 struct perf_evsel *evsel __maybe_unused,
665 double avg)
8bb6c79f
IM
666{
667 double total, ratio = 0.0;
668 const char *color;
669
670 total = avg_stats(&runtime_l1_dcache_stats[cpu]);
671
672 if (total)
673 ratio = avg / total * 100.0;
674
15e6392f 675 color = get_ratio_color(GRC_CACHE_MISSES, ratio);
8bb6c79f 676
4aa9015f
SE
677 fprintf(output, " # ");
678 color_fprintf(output, color, "%6.2f%%", ratio);
679 fprintf(output, " of all L1-dcache hits ");
8bb6c79f
IM
680}
681
1d037ca1
IT
682static void print_l1_icache_misses(int cpu,
683 struct perf_evsel *evsel __maybe_unused,
684 double avg)
c3305257
IM
685{
686 double total, ratio = 0.0;
687 const char *color;
688
689 total = avg_stats(&runtime_l1_icache_stats[cpu]);
690
691 if (total)
692 ratio = avg / total * 100.0;
693
15e6392f 694 color = get_ratio_color(GRC_CACHE_MISSES, ratio);
c3305257 695
4aa9015f
SE
696 fprintf(output, " # ");
697 color_fprintf(output, color, "%6.2f%%", ratio);
698 fprintf(output, " of all L1-icache hits ");
c3305257
IM
699}
700
1d037ca1
IT
701static void print_dtlb_cache_misses(int cpu,
702 struct perf_evsel *evsel __maybe_unused,
703 double avg)
c3305257
IM
704{
705 double total, ratio = 0.0;
706 const char *color;
707
708 total = avg_stats(&runtime_dtlb_cache_stats[cpu]);
709
710 if (total)
711 ratio = avg / total * 100.0;
712
15e6392f 713 color = get_ratio_color(GRC_CACHE_MISSES, ratio);
c3305257 714
4aa9015f
SE
715 fprintf(output, " # ");
716 color_fprintf(output, color, "%6.2f%%", ratio);
717 fprintf(output, " of all dTLB cache hits ");
c3305257
IM
718}
719
1d037ca1
IT
720static void print_itlb_cache_misses(int cpu,
721 struct perf_evsel *evsel __maybe_unused,
722 double avg)
c3305257
IM
723{
724 double total, ratio = 0.0;
725 const char *color;
726
727 total = avg_stats(&runtime_itlb_cache_stats[cpu]);
728
729 if (total)
730 ratio = avg / total * 100.0;
731
15e6392f 732 color = get_ratio_color(GRC_CACHE_MISSES, ratio);
c3305257 733
4aa9015f
SE
734 fprintf(output, " # ");
735 color_fprintf(output, color, "%6.2f%%", ratio);
736 fprintf(output, " of all iTLB cache hits ");
c3305257
IM
737}
738
1d037ca1
IT
739static void print_ll_cache_misses(int cpu,
740 struct perf_evsel *evsel __maybe_unused,
741 double avg)
c3305257
IM
742{
743 double total, ratio = 0.0;
744 const char *color;
745
746 total = avg_stats(&runtime_ll_cache_stats[cpu]);
747
748 if (total)
749 ratio = avg / total * 100.0;
750
15e6392f 751 color = get_ratio_color(GRC_CACHE_MISSES, ratio);
c3305257 752
4aa9015f
SE
753 fprintf(output, " # ");
754 color_fprintf(output, color, "%6.2f%%", ratio);
755 fprintf(output, " of all LL-cache hits ");
c3305257
IM
756}
757
d7e7a451 758static void abs_printout(int cpu, int nr, struct perf_evsel *evsel, double avg)
44175b6f 759{
c7f7fea3 760 double total, ratio = 0.0;
f5b4a9c3 761 char cpustr[16] = { '\0', };
d7470b6a
SE
762 const char *fmt;
763
764 if (csv_output)
765 fmt = "%s%.0f%s%s";
766 else if (big_num)
2cba3ffb 767 fmt = "%s%'18.0f%s%-25s";
d7470b6a 768 else
2cba3ffb 769 fmt = "%s%18.0f%s%-25s";
f5b4a9c3 770
d7e7a451
SE
771 if (aggr_socket)
772 sprintf(cpustr, "S%*d%s%*d%s",
773 csv_output ? 0 : -5,
774 cpu,
775 csv_sep,
776 csv_output ? 0 : 4,
777 nr,
778 csv_sep);
779 else if (no_aggr)
d7470b6a
SE
780 sprintf(cpustr, "CPU%*d%s",
781 csv_output ? 0 : -4,
7ae92e74 782 perf_evsel__cpus(evsel)->map[cpu], csv_sep);
f5b4a9c3
SE
783 else
784 cpu = 0;
c7f7fea3 785
7289f83c 786 fprintf(output, fmt, cpustr, avg, csv_sep, perf_evsel__name(evsel));
d7470b6a 787
023695d9 788 if (evsel->cgrp)
4aa9015f 789 fprintf(output, "%s%s", csv_sep, evsel->cgrp->name);
023695d9 790
13370a9b 791 if (csv_output || interval)
d7470b6a 792 return;
44175b6f 793
daec78a0 794 if (perf_evsel__match(evsel, HARDWARE, HW_INSTRUCTIONS)) {
f5b4a9c3 795 total = avg_stats(&runtime_cycles_stats[cpu]);
c7f7fea3
IM
796 if (total)
797 ratio = avg / total;
798
4aa9015f 799 fprintf(output, " # %5.2f insns per cycle ", ratio);
481f988a 800
d3d1e86d
IM
801 total = avg_stats(&runtime_stalled_cycles_front_stats[cpu]);
802 total = max(total, avg_stats(&runtime_stalled_cycles_back_stats[cpu]));
481f988a
IM
803
804 if (total && avg) {
805 ratio = total / avg;
4aa9015f 806 fprintf(output, "\n # %5.2f stalled cycles per insn", ratio);
481f988a
IM
807 }
808
daec78a0 809 } else if (perf_evsel__match(evsel, HARDWARE, HW_BRANCH_MISSES) &&
f5b4a9c3 810 runtime_branches_stats[cpu].n != 0) {
c78df6c1 811 print_branch_misses(cpu, evsel, avg);
8bb6c79f
IM
812 } else if (
813 evsel->attr.type == PERF_TYPE_HW_CACHE &&
814 evsel->attr.config == ( PERF_COUNT_HW_CACHE_L1D |
815 ((PERF_COUNT_HW_CACHE_OP_READ) << 8) |
816 ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16)) &&
c6264def 817 runtime_l1_dcache_stats[cpu].n != 0) {
8bb6c79f 818 print_l1_dcache_misses(cpu, evsel, avg);
c3305257
IM
819 } else if (
820 evsel->attr.type == PERF_TYPE_HW_CACHE &&
821 evsel->attr.config == ( PERF_COUNT_HW_CACHE_L1I |
822 ((PERF_COUNT_HW_CACHE_OP_READ) << 8) |
823 ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16)) &&
824 runtime_l1_icache_stats[cpu].n != 0) {
825 print_l1_icache_misses(cpu, evsel, avg);
826 } else if (
827 evsel->attr.type == PERF_TYPE_HW_CACHE &&
828 evsel->attr.config == ( PERF_COUNT_HW_CACHE_DTLB |
829 ((PERF_COUNT_HW_CACHE_OP_READ) << 8) |
830 ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16)) &&
831 runtime_dtlb_cache_stats[cpu].n != 0) {
832 print_dtlb_cache_misses(cpu, evsel, avg);
833 } else if (
834 evsel->attr.type == PERF_TYPE_HW_CACHE &&
835 evsel->attr.config == ( PERF_COUNT_HW_CACHE_ITLB |
836 ((PERF_COUNT_HW_CACHE_OP_READ) << 8) |
837 ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16)) &&
838 runtime_itlb_cache_stats[cpu].n != 0) {
839 print_itlb_cache_misses(cpu, evsel, avg);
840 } else if (
841 evsel->attr.type == PERF_TYPE_HW_CACHE &&
842 evsel->attr.config == ( PERF_COUNT_HW_CACHE_LL |
843 ((PERF_COUNT_HW_CACHE_OP_READ) << 8) |
844 ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16)) &&
845 runtime_ll_cache_stats[cpu].n != 0) {
846 print_ll_cache_misses(cpu, evsel, avg);
d58f4c82
IM
847 } else if (perf_evsel__match(evsel, HARDWARE, HW_CACHE_MISSES) &&
848 runtime_cacherefs_stats[cpu].n != 0) {
849 total = avg_stats(&runtime_cacherefs_stats[cpu]);
850
851 if (total)
852 ratio = avg * 100 / total;
853
4aa9015f 854 fprintf(output, " # %8.3f %% of all cache refs ", ratio);
d58f4c82 855
d3d1e86d
IM
856 } else if (perf_evsel__match(evsel, HARDWARE, HW_STALLED_CYCLES_FRONTEND)) {
857 print_stalled_cycles_frontend(cpu, evsel, avg);
129c04cb 858 } else if (perf_evsel__match(evsel, HARDWARE, HW_STALLED_CYCLES_BACKEND)) {
d3d1e86d 859 print_stalled_cycles_backend(cpu, evsel, avg);
481f988a 860 } else if (perf_evsel__match(evsel, HARDWARE, HW_CPU_CYCLES)) {
f5b4a9c3 861 total = avg_stats(&runtime_nsecs_stats[cpu]);
c7f7fea3
IM
862
863 if (total)
481f988a 864 ratio = 1.0 * avg / total;
c7f7fea3 865
4aa9015f 866 fprintf(output, " # %8.3f GHz ", ratio);
481f988a 867 } else if (runtime_nsecs_stats[cpu].n != 0) {
5fde2523
NK
868 char unit = 'M';
869
481f988a 870 total = avg_stats(&runtime_nsecs_stats[cpu]);
11ba2b85
IM
871
872 if (total)
481f988a 873 ratio = 1000.0 * avg / total;
5fde2523
NK
874 if (ratio < 0.001) {
875 ratio *= 1000;
876 unit = 'K';
877 }
11ba2b85 878
5fde2523 879 fprintf(output, " # %8.3f %c/sec ", ratio, unit);
a5d243d0 880 } else {
4aa9015f 881 fprintf(output, " ");
44175b6f 882 }
44175b6f
IM
883}
884
d7e7a451
SE
885static void print_aggr_socket(char *prefix)
886{
887 struct perf_evsel *counter;
888 u64 ena, run, val;
889 int cpu, s, s2, sock, nr;
890
891 if (!sock_map)
892 return;
893
894 for (s = 0; s < sock_map->nr; s++) {
895 sock = cpu_map__socket(sock_map, s);
896 list_for_each_entry(counter, &evsel_list->entries, node) {
897 val = ena = run = 0;
898 nr = 0;
899 for (cpu = 0; cpu < perf_evsel__nr_cpus(counter); cpu++) {
900 s2 = cpu_map__get_socket(evsel_list->cpus, cpu);
901 if (s2 != sock)
902 continue;
903 val += counter->counts->cpu[cpu].val;
904 ena += counter->counts->cpu[cpu].ena;
905 run += counter->counts->cpu[cpu].run;
906 nr++;
907 }
908 if (prefix)
909 fprintf(output, "%s", prefix);
910
911 if (run == 0 || ena == 0) {
912 fprintf(output, "S%*d%s%*d%s%*s%s%*s",
913 csv_output ? 0 : -5,
914 s,
915 csv_sep,
916 csv_output ? 0 : 4,
917 nr,
918 csv_sep,
919 csv_output ? 0 : 18,
920 counter->supported ? CNTR_NOT_COUNTED : CNTR_NOT_SUPPORTED,
921 csv_sep,
922 csv_output ? 0 : -24,
923 perf_evsel__name(counter));
924 if (counter->cgrp)
925 fprintf(output, "%s%s",
926 csv_sep, counter->cgrp->name);
927
928 fputc('\n', output);
929 continue;
930 }
931
932 if (nsec_counter(counter))
933 nsec_printout(sock, nr, counter, val);
934 else
935 abs_printout(sock, nr, counter, val);
936
937 if (!csv_output) {
938 print_noise(counter, 1.0);
939
940 if (run != ena)
941 fprintf(output, " (%.2f%%)",
942 100.0 * run / ena);
943 }
944 fputc('\n', output);
945 }
946 }
947}
948
2996f5dd
IM
949/*
950 * Print out the results of a single counter:
f5b4a9c3 951 * aggregated counts in system-wide mode
2996f5dd 952 */
13370a9b 953static void print_counter_aggr(struct perf_evsel *counter, char *prefix)
2996f5dd 954{
69aad6f1
ACM
955 struct perf_stat *ps = counter->priv;
956 double avg = avg_stats(&ps->res_stats[0]);
c52b12ed 957 int scaled = counter->counts->scaled;
2996f5dd 958
13370a9b
SE
959 if (prefix)
960 fprintf(output, "%s", prefix);
961
2996f5dd 962 if (scaled == -1) {
4aa9015f 963 fprintf(output, "%*s%s%*s",
d7470b6a 964 csv_output ? 0 : 18,
2cee77c4 965 counter->supported ? CNTR_NOT_COUNTED : CNTR_NOT_SUPPORTED,
023695d9
SE
966 csv_sep,
967 csv_output ? 0 : -24,
7289f83c 968 perf_evsel__name(counter));
023695d9
SE
969
970 if (counter->cgrp)
4aa9015f 971 fprintf(output, "%s%s", csv_sep, counter->cgrp->name);
023695d9 972
4aa9015f 973 fputc('\n', output);
2996f5dd
IM
974 return;
975 }
c04f5e5d 976
44175b6f 977 if (nsec_counter(counter))
d7e7a451 978 nsec_printout(-1, 0, counter, avg);
44175b6f 979 else
d7e7a451 980 abs_printout(-1, 0, counter, avg);
849abde9 981
3ae9a34d
ZH
982 print_noise(counter, avg);
983
d7470b6a 984 if (csv_output) {
4aa9015f 985 fputc('\n', output);
d7470b6a
SE
986 return;
987 }
988
506d4bc8
PZ
989 if (scaled) {
990 double avg_enabled, avg_running;
991
69aad6f1
ACM
992 avg_enabled = avg_stats(&ps->res_stats[1]);
993 avg_running = avg_stats(&ps->res_stats[2]);
d7c29318 994
4aa9015f 995 fprintf(output, " [%5.2f%%]", 100 * avg_running / avg_enabled);
506d4bc8 996 }
4aa9015f 997 fprintf(output, "\n");
c04f5e5d
IM
998}
999
f5b4a9c3
SE
1000/*
1001 * Print out the results of a single counter:
1002 * does not use aggregated count in system-wide
1003 */
13370a9b 1004static void print_counter(struct perf_evsel *counter, char *prefix)
f5b4a9c3
SE
1005{
1006 u64 ena, run, val;
1007 int cpu;
1008
7ae92e74 1009 for (cpu = 0; cpu < perf_evsel__nr_cpus(counter); cpu++) {
c52b12ed
ACM
1010 val = counter->counts->cpu[cpu].val;
1011 ena = counter->counts->cpu[cpu].ena;
1012 run = counter->counts->cpu[cpu].run;
13370a9b
SE
1013
1014 if (prefix)
1015 fprintf(output, "%s", prefix);
1016
f5b4a9c3 1017 if (run == 0 || ena == 0) {
4aa9015f 1018 fprintf(output, "CPU%*d%s%*s%s%*s",
d7470b6a 1019 csv_output ? 0 : -4,
7ae92e74 1020 perf_evsel__cpus(counter)->map[cpu], csv_sep,
d7470b6a 1021 csv_output ? 0 : 18,
2cee77c4
DA
1022 counter->supported ? CNTR_NOT_COUNTED : CNTR_NOT_SUPPORTED,
1023 csv_sep,
023695d9 1024 csv_output ? 0 : -24,
7289f83c 1025 perf_evsel__name(counter));
f5b4a9c3 1026
023695d9 1027 if (counter->cgrp)
4aa9015f
SE
1028 fprintf(output, "%s%s",
1029 csv_sep, counter->cgrp->name);
023695d9 1030
4aa9015f 1031 fputc('\n', output);
f5b4a9c3
SE
1032 continue;
1033 }
1034
1035 if (nsec_counter(counter))
d7e7a451 1036 nsec_printout(cpu, 0, counter, val);
f5b4a9c3 1037 else
d7e7a451 1038 abs_printout(cpu, 0, counter, val);
f5b4a9c3 1039
d7470b6a
SE
1040 if (!csv_output) {
1041 print_noise(counter, 1.0);
f5b4a9c3 1042
c6264def 1043 if (run != ena)
4aa9015f
SE
1044 fprintf(output, " (%.2f%%)",
1045 100.0 * run / ena);
f5b4a9c3 1046 }
4aa9015f 1047 fputc('\n', output);
f5b4a9c3
SE
1048 }
1049}
1050
42202dd5
IM
1051static void print_stat(int argc, const char **argv)
1052{
69aad6f1
ACM
1053 struct perf_evsel *counter;
1054 int i;
42202dd5 1055
ddcacfa0
IM
1056 fflush(stdout);
1057
d7470b6a 1058 if (!csv_output) {
4aa9015f
SE
1059 fprintf(output, "\n");
1060 fprintf(output, " Performance counter stats for ");
aa22dd49 1061 if (!perf_target__has_task(&target)) {
4aa9015f 1062 fprintf(output, "\'%s", argv[0]);
d7470b6a 1063 for (i = 1; i < argc; i++)
4aa9015f 1064 fprintf(output, " %s", argv[i]);
20f946b4
NK
1065 } else if (target.pid)
1066 fprintf(output, "process id \'%s", target.pid);
d7470b6a 1067 else
20f946b4 1068 fprintf(output, "thread id \'%s", target.tid);
44db76c8 1069
4aa9015f 1070 fprintf(output, "\'");
d7470b6a 1071 if (run_count > 1)
4aa9015f
SE
1072 fprintf(output, " (%d runs)", run_count);
1073 fprintf(output, ":\n\n");
d7470b6a 1074 }
2996f5dd 1075
d7e7a451
SE
1076 if (aggr_socket)
1077 print_aggr_socket(NULL);
1078 else if (no_aggr) {
361c99a6 1079 list_for_each_entry(counter, &evsel_list->entries, node)
13370a9b 1080 print_counter(counter, NULL);
f5b4a9c3 1081 } else {
361c99a6 1082 list_for_each_entry(counter, &evsel_list->entries, node)
13370a9b 1083 print_counter_aggr(counter, NULL);
f5b4a9c3 1084 }
ddcacfa0 1085
d7470b6a 1086 if (!csv_output) {
c3305257 1087 if (!null_run)
4aa9015f
SE
1088 fprintf(output, "\n");
1089 fprintf(output, " %17.9f seconds time elapsed",
d7470b6a
SE
1090 avg_stats(&walltime_nsecs_stats)/1e9);
1091 if (run_count > 1) {
4aa9015f 1092 fprintf(output, " ");
f99844cb
IM
1093 print_noise_pct(stddev_stats(&walltime_nsecs_stats),
1094 avg_stats(&walltime_nsecs_stats));
d7470b6a 1095 }
4aa9015f 1096 fprintf(output, "\n\n");
566747e6 1097 }
ddcacfa0
IM
1098}
1099
f7b7c26e
PZ
1100static volatile int signr = -1;
1101
5242519b 1102static void skip_signal(int signo)
ddcacfa0 1103{
13370a9b 1104 if ((child_pid == -1) || interval)
60666c63
LW
1105 done = 1;
1106
f7b7c26e
PZ
1107 signr = signo;
1108}
1109
1110static void sig_atexit(void)
1111{
933da83a
CW
1112 if (child_pid != -1)
1113 kill(child_pid, SIGTERM);
1114
f7b7c26e
PZ
1115 if (signr == -1)
1116 return;
1117
1118 signal(signr, SIG_DFL);
1119 kill(getpid(), signr);
5242519b
IM
1120}
1121
1d037ca1
IT
1122static int stat__set_big_num(const struct option *opt __maybe_unused,
1123 const char *s __maybe_unused, int unset)
d7470b6a
SE
1124{
1125 big_num_opt = unset ? 0 : 1;
1126 return 0;
1127}
1128
2cba3ffb
IM
1129/*
1130 * Add default attributes, if there were no attributes specified or
1131 * if -d/--detailed, -d -d or -d -d -d is used:
1132 */
1133static int add_default_attributes(void)
1134{
b070a547
ACM
1135 struct perf_event_attr default_attrs[] = {
1136
1137 { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_TASK_CLOCK },
1138 { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_CONTEXT_SWITCHES },
1139 { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_CPU_MIGRATIONS },
1140 { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_PAGE_FAULTS },
1141
1142 { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_CPU_CYCLES },
1143 { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_STALLED_CYCLES_FRONTEND },
1144 { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_STALLED_CYCLES_BACKEND },
1145 { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_INSTRUCTIONS },
1146 { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_BRANCH_INSTRUCTIONS },
1147 { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_BRANCH_MISSES },
1148
1149};
1150
1151/*
1152 * Detailed stats (-d), covering the L1 and last level data caches:
1153 */
1154 struct perf_event_attr detailed_attrs[] = {
1155
1156 { .type = PERF_TYPE_HW_CACHE,
1157 .config =
1158 PERF_COUNT_HW_CACHE_L1D << 0 |
1159 (PERF_COUNT_HW_CACHE_OP_READ << 8) |
1160 (PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16) },
1161
1162 { .type = PERF_TYPE_HW_CACHE,
1163 .config =
1164 PERF_COUNT_HW_CACHE_L1D << 0 |
1165 (PERF_COUNT_HW_CACHE_OP_READ << 8) |
1166 (PERF_COUNT_HW_CACHE_RESULT_MISS << 16) },
1167
1168 { .type = PERF_TYPE_HW_CACHE,
1169 .config =
1170 PERF_COUNT_HW_CACHE_LL << 0 |
1171 (PERF_COUNT_HW_CACHE_OP_READ << 8) |
1172 (PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16) },
1173
1174 { .type = PERF_TYPE_HW_CACHE,
1175 .config =
1176 PERF_COUNT_HW_CACHE_LL << 0 |
1177 (PERF_COUNT_HW_CACHE_OP_READ << 8) |
1178 (PERF_COUNT_HW_CACHE_RESULT_MISS << 16) },
1179};
1180
1181/*
1182 * Very detailed stats (-d -d), covering the instruction cache and the TLB caches:
1183 */
1184 struct perf_event_attr very_detailed_attrs[] = {
1185
1186 { .type = PERF_TYPE_HW_CACHE,
1187 .config =
1188 PERF_COUNT_HW_CACHE_L1I << 0 |
1189 (PERF_COUNT_HW_CACHE_OP_READ << 8) |
1190 (PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16) },
1191
1192 { .type = PERF_TYPE_HW_CACHE,
1193 .config =
1194 PERF_COUNT_HW_CACHE_L1I << 0 |
1195 (PERF_COUNT_HW_CACHE_OP_READ << 8) |
1196 (PERF_COUNT_HW_CACHE_RESULT_MISS << 16) },
1197
1198 { .type = PERF_TYPE_HW_CACHE,
1199 .config =
1200 PERF_COUNT_HW_CACHE_DTLB << 0 |
1201 (PERF_COUNT_HW_CACHE_OP_READ << 8) |
1202 (PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16) },
1203
1204 { .type = PERF_TYPE_HW_CACHE,
1205 .config =
1206 PERF_COUNT_HW_CACHE_DTLB << 0 |
1207 (PERF_COUNT_HW_CACHE_OP_READ << 8) |
1208 (PERF_COUNT_HW_CACHE_RESULT_MISS << 16) },
1209
1210 { .type = PERF_TYPE_HW_CACHE,
1211 .config =
1212 PERF_COUNT_HW_CACHE_ITLB << 0 |
1213 (PERF_COUNT_HW_CACHE_OP_READ << 8) |
1214 (PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16) },
1215
1216 { .type = PERF_TYPE_HW_CACHE,
1217 .config =
1218 PERF_COUNT_HW_CACHE_ITLB << 0 |
1219 (PERF_COUNT_HW_CACHE_OP_READ << 8) |
1220 (PERF_COUNT_HW_CACHE_RESULT_MISS << 16) },
1221
1222};
1223
1224/*
1225 * Very, very detailed stats (-d -d -d), adding prefetch events:
1226 */
1227 struct perf_event_attr very_very_detailed_attrs[] = {
1228
1229 { .type = PERF_TYPE_HW_CACHE,
1230 .config =
1231 PERF_COUNT_HW_CACHE_L1D << 0 |
1232 (PERF_COUNT_HW_CACHE_OP_PREFETCH << 8) |
1233 (PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16) },
1234
1235 { .type = PERF_TYPE_HW_CACHE,
1236 .config =
1237 PERF_COUNT_HW_CACHE_L1D << 0 |
1238 (PERF_COUNT_HW_CACHE_OP_PREFETCH << 8) |
1239 (PERF_COUNT_HW_CACHE_RESULT_MISS << 16) },
1240};
1241
2cba3ffb
IM
1242 /* Set attrs if no event is selected and !null_run: */
1243 if (null_run)
1244 return 0;
1245
1246 if (!evsel_list->nr_entries) {
79695e1b 1247 if (perf_evlist__add_default_attrs(evsel_list, default_attrs) < 0)
50d08e47 1248 return -1;
2cba3ffb
IM
1249 }
1250
1251 /* Detailed events get appended to the event list: */
1252
1253 if (detailed_run < 1)
1254 return 0;
1255
1256 /* Append detailed run extra attributes: */
79695e1b 1257 if (perf_evlist__add_default_attrs(evsel_list, detailed_attrs) < 0)
50d08e47 1258 return -1;
2cba3ffb
IM
1259
1260 if (detailed_run < 2)
1261 return 0;
1262
1263 /* Append very detailed run extra attributes: */
79695e1b 1264 if (perf_evlist__add_default_attrs(evsel_list, very_detailed_attrs) < 0)
50d08e47 1265 return -1;
2cba3ffb
IM
1266
1267 if (detailed_run < 3)
1268 return 0;
1269
1270 /* Append very, very detailed run extra attributes: */
79695e1b 1271 return perf_evlist__add_default_attrs(evsel_list, very_very_detailed_attrs);
2cba3ffb
IM
1272}
1273
1d037ca1 1274int cmd_stat(int argc, const char **argv, const char *prefix __maybe_unused)
5242519b 1275{
1f16c575 1276 bool append_file = false;
b070a547
ACM
1277 int output_fd = 0;
1278 const char *output_name = NULL;
1279 const struct option options[] = {
1280 OPT_CALLBACK('e', "event", &evsel_list, "event",
1281 "event selector. use 'perf list' to list available events",
1282 parse_events_option),
1283 OPT_CALLBACK(0, "filter", &evsel_list, "filter",
1284 "event filter", parse_filter),
1285 OPT_BOOLEAN('i', "no-inherit", &no_inherit,
1286 "child tasks do not inherit counters"),
1287 OPT_STRING('p', "pid", &target.pid, "pid",
1288 "stat events on existing process id"),
1289 OPT_STRING('t', "tid", &target.tid, "tid",
1290 "stat events on existing thread id"),
1291 OPT_BOOLEAN('a', "all-cpus", &target.system_wide,
1292 "system-wide collection from all CPUs"),
1293 OPT_BOOLEAN('g', "group", &group,
1294 "put the counters into a counter group"),
1295 OPT_BOOLEAN('c', "scale", &scale, "scale/normalize counters"),
1296 OPT_INCR('v', "verbose", &verbose,
1297 "be more verbose (show counter open errors, etc)"),
1298 OPT_INTEGER('r', "repeat", &run_count,
1299 "repeat command and print average + stddev (max: 100)"),
1300 OPT_BOOLEAN('n', "null", &null_run,
1301 "null run - dont start any counters"),
1302 OPT_INCR('d', "detailed", &detailed_run,
1303 "detailed run - start a lot of events"),
1304 OPT_BOOLEAN('S', "sync", &sync_run,
1305 "call sync() before starting a run"),
1306 OPT_CALLBACK_NOOPT('B', "big-num", NULL, NULL,
1307 "print large numbers with thousands\' separators",
1308 stat__set_big_num),
1309 OPT_STRING('C', "cpu", &target.cpu_list, "cpu",
1310 "list of cpus to monitor in system-wide"),
1311 OPT_BOOLEAN('A', "no-aggr", &no_aggr, "disable CPU count aggregation"),
1312 OPT_STRING('x', "field-separator", &csv_sep, "separator",
1313 "print counts with custom separator"),
1314 OPT_CALLBACK('G', "cgroup", &evsel_list, "name",
1315 "monitor event in cgroup name only", parse_cgroups),
1316 OPT_STRING('o', "output", &output_name, "file", "output file name"),
1317 OPT_BOOLEAN(0, "append", &append_file, "append to the output file"),
1318 OPT_INTEGER(0, "log-fd", &output_fd,
1319 "log output to fd, instead of stderr"),
1f16c575
PZ
1320 OPT_STRING(0, "pre", &pre_cmd, "command",
1321 "command to run prior to the measured command"),
1322 OPT_STRING(0, "post", &post_cmd, "command",
1323 "command to run after to the measured command"),
13370a9b
SE
1324 OPT_UINTEGER('I', "interval-print", &interval,
1325 "print counts at regular interval in ms (>= 100)"),
d7e7a451 1326 OPT_BOOLEAN(0, "aggr-socket", &aggr_socket, "aggregate counts per processor socket"),
b070a547
ACM
1327 OPT_END()
1328 };
1329 const char * const stat_usage[] = {
1330 "perf stat [<options>] [<command>]",
1331 NULL
1332 };
69aad6f1 1333 struct perf_evsel *pos;
b070a547 1334 int status = -ENOMEM, run_idx;
4aa9015f 1335 const char *mode;
42202dd5 1336
5af52b51
SE
1337 setlocale(LC_ALL, "");
1338
334fe7a3 1339 evsel_list = perf_evlist__new();
361c99a6
ACM
1340 if (evsel_list == NULL)
1341 return -ENOMEM;
1342
a0541234
AB
1343 argc = parse_options(argc, argv, options, stat_usage,
1344 PARSE_OPT_STOP_AT_NON_OPTION);
d7470b6a 1345
4aa9015f
SE
1346 output = stderr;
1347 if (output_name && strcmp(output_name, "-"))
1348 output = NULL;
1349
56f3bae7
JC
1350 if (output_name && output_fd) {
1351 fprintf(stderr, "cannot use both --output and --log-fd\n");
1352 usage_with_options(stat_usage, options);
1353 }
fc3e4d07
SE
1354
1355 if (output_fd < 0) {
1356 fprintf(stderr, "argument to --log-fd must be a > 0\n");
1357 usage_with_options(stat_usage, options);
1358 }
1359
4aa9015f
SE
1360 if (!output) {
1361 struct timespec tm;
1362 mode = append_file ? "a" : "w";
1363
1364 output = fopen(output_name, mode);
1365 if (!output) {
1366 perror("failed to create output file");
fceda7fe 1367 return -1;
4aa9015f
SE
1368 }
1369 clock_gettime(CLOCK_REALTIME, &tm);
1370 fprintf(output, "# started on %s\n", ctime(&tm.tv_sec));
fc3e4d07 1371 } else if (output_fd > 0) {
56f3bae7
JC
1372 mode = append_file ? "a" : "w";
1373 output = fdopen(output_fd, mode);
1374 if (!output) {
1375 perror("Failed opening logfd");
1376 return -errno;
1377 }
4aa9015f
SE
1378 }
1379
d4ffd04d 1380 if (csv_sep) {
d7470b6a 1381 csv_output = true;
d4ffd04d
JC
1382 if (!strcmp(csv_sep, "\\t"))
1383 csv_sep = "\t";
1384 } else
d7470b6a
SE
1385 csv_sep = DEFAULT_SEPARATOR;
1386
1387 /*
1388 * let the spreadsheet do the pretty-printing
1389 */
1390 if (csv_output) {
61a9f324 1391 /* User explicitly passed -B? */
d7470b6a
SE
1392 if (big_num_opt == 1) {
1393 fprintf(stderr, "-B option not supported with -x\n");
1394 usage_with_options(stat_usage, options);
1395 } else /* Nope, so disable big number formatting */
1396 big_num = false;
1397 } else if (big_num_opt == 0) /* User passed --no-big-num */
1398 big_num = false;
1399
aa22dd49 1400 if (!argc && !perf_target__has_task(&target))
5242519b 1401 usage_with_options(stat_usage, options);
9e9772c4 1402 if (run_count <= 0)
42202dd5 1403 usage_with_options(stat_usage, options);
ddcacfa0 1404
023695d9 1405 /* no_aggr, cgroup are for system-wide only */
aa22dd49 1406 if ((no_aggr || nr_cgroups) && !perf_target__has_cpu(&target)) {
023695d9
SE
1407 fprintf(stderr, "both cgroup and no-aggregation "
1408 "modes only available in system-wide mode\n");
1409
f5b4a9c3 1410 usage_with_options(stat_usage, options);
023695d9 1411 }
f5b4a9c3 1412
d7e7a451
SE
1413 if (aggr_socket) {
1414 if (!perf_target__has_cpu(&target)) {
1415 fprintf(stderr, "--aggr-socket only available in system-wide mode (-a)\n");
1416 usage_with_options(stat_usage, options);
1417 }
1418 no_aggr = true;
1419 }
1420
2cba3ffb
IM
1421 if (add_default_attributes())
1422 goto out;
ddcacfa0 1423
4bd0f2d2 1424 perf_target__validate(&target);
5c98d466 1425
77a6f014 1426 if (perf_evlist__create_maps(evsel_list, &target) < 0) {
aa22dd49 1427 if (perf_target__has_task(&target))
77a6f014 1428 pr_err("Problems finding threads of monitor\n");
aa22dd49 1429 if (perf_target__has_cpu(&target))
77a6f014 1430 perror("failed to parse CPUs map");
ddcacfa0 1431
c45c6ea2 1432 usage_with_options(stat_usage, options);
60d567e2
ACM
1433 return -1;
1434 }
13370a9b
SE
1435 if (interval && interval < 100) {
1436 pr_err("print interval must be >= 100ms\n");
1437 usage_with_options(stat_usage, options);
1438 return -1;
1439 }
c45c6ea2 1440
361c99a6 1441 list_for_each_entry(pos, &evsel_list->entries, node) {
c52b12ed 1442 if (perf_evsel__alloc_stat_priv(pos) < 0 ||
7ae92e74 1443 perf_evsel__alloc_counts(pos, perf_evsel__nr_cpus(pos)) < 0)
69aad6f1 1444 goto out_free_fd;
d6d901c2 1445 }
13370a9b
SE
1446 if (interval) {
1447 list_for_each_entry(pos, &evsel_list->entries, node) {
1448 if (perf_evsel__alloc_prev_raw_counts(pos) < 0)
1449 goto out_free_fd;
1450 }
1451 }
d6d901c2 1452
58d7e993
IM
1453 /*
1454 * We dont want to block the signals - that would cause
1455 * child tasks to inherit that and Ctrl-C would not work.
1456 * What we want is for Ctrl-C to work in the exec()-ed
1457 * task, but being ignored by perf stat itself:
1458 */
f7b7c26e 1459 atexit(sig_atexit);
58d7e993 1460 signal(SIGINT, skip_signal);
13370a9b 1461 signal(SIGCHLD, skip_signal);
58d7e993
IM
1462 signal(SIGALRM, skip_signal);
1463 signal(SIGABRT, skip_signal);
1464
42202dd5
IM
1465 status = 0;
1466 for (run_idx = 0; run_idx < run_count; run_idx++) {
1467 if (run_count != 1 && verbose)
4aa9015f
SE
1468 fprintf(output, "[ perf stat: executing run #%d ... ]\n",
1469 run_idx + 1);
f9cef0a9 1470
42202dd5
IM
1471 status = run_perf_stat(argc, argv);
1472 }
1473
13370a9b 1474 if (status != -1 && !interval)
084ab9f8 1475 print_stat(argc, argv);
69aad6f1 1476out_free_fd:
13370a9b 1477 list_for_each_entry(pos, &evsel_list->entries, node) {
69aad6f1 1478 perf_evsel__free_stat_priv(pos);
43f8e76e 1479 perf_evsel__free_counts(pos);
13370a9b
SE
1480 perf_evsel__free_prev_raw_counts(pos);
1481 }
7e2ed097 1482 perf_evlist__delete_maps(evsel_list);
0015e2e1
ACM
1483out:
1484 perf_evlist__delete(evsel_list);
42202dd5 1485 return status;
ddcacfa0 1486}