2 * perf stat: /usr/bin/time -alike performance counter statistics utility
4 It summarizes the counter events of all tasks (and child tasks),
5 covering all CPUs that the command (or workload) executes on.
6 It only counts the per-task events of the workload started,
7 independent of how many other tasks run on those CPUs.
11 $ perf stat -e 1 -e 3 -e 5 ls -lR /usr/include/ >/dev/null
13 Performance counter stats for 'ls':
15 163516953 instructions
19 * Copyright (C) 2008, Red Hat Inc, Ingo Molnar <mingo@redhat.com>
21 * Improvements and fixes by:
23 * Arjan van de Ven <arjan@linux.intel.com>
24 * Yanmin Zhang <yanmin.zhang@intel.com>
25 * Wu Fengguang <fengguang.wu@intel.com>
26 * Mike Galbraith <efault@gmx.de>
27 * Paul Mackerras <paulus@samba.org>
29 * Released under the GPL v2. (and only v2, not any later version)
34 #include "util/util.h"
35 #include "util/parse-options.h"
36 #include "util/parse-events.h"
38 #include <sys/prctl.h>
40 static int system_wide
= 0;
41 static int inherit
= 1;
43 static __u64 default_event_id
[MAX_COUNTERS
] = {
44 EID(PERF_TYPE_SOFTWARE
, PERF_COUNT_TASK_CLOCK
),
45 EID(PERF_TYPE_SOFTWARE
, PERF_COUNT_CONTEXT_SWITCHES
),
46 EID(PERF_TYPE_SOFTWARE
, PERF_COUNT_CPU_MIGRATIONS
),
47 EID(PERF_TYPE_SOFTWARE
, PERF_COUNT_PAGE_FAULTS
),
49 EID(PERF_TYPE_HARDWARE
, PERF_COUNT_CPU_CYCLES
),
50 EID(PERF_TYPE_HARDWARE
, PERF_COUNT_INSTRUCTIONS
),
51 EID(PERF_TYPE_HARDWARE
, PERF_COUNT_CACHE_REFERENCES
),
52 EID(PERF_TYPE_HARDWARE
, PERF_COUNT_CACHE_MISSES
),
55 static int default_interval
= 100000;
56 static int event_count
[MAX_COUNTERS
];
57 static int fd
[MAX_NR_CPUS
][MAX_COUNTERS
];
59 static int target_pid
= -1;
60 static int nr_cpus
= 0;
61 static unsigned int page_size
;
65 static const unsigned int default_count
[] = {
74 static __u64 event_res
[MAX_COUNTERS
][3];
75 static __u64 event_scaled
[MAX_COUNTERS
];
77 static __u64 runtime_nsecs
;
78 static __u64 walltime_nsecs
;
80 static void create_perfstat_counter(int counter
)
82 struct perf_counter_hw_event hw_event
;
84 memset(&hw_event
, 0, sizeof(hw_event
));
85 hw_event
.config
= event_id
[counter
];
86 hw_event
.record_type
= 0;
87 hw_event
.exclude_kernel
= event_mask
[counter
] & EVENT_MASK_KERNEL
;
88 hw_event
.exclude_user
= event_mask
[counter
] & EVENT_MASK_USER
;
91 hw_event
.read_format
= PERF_FORMAT_TOTAL_TIME_ENABLED
|
92 PERF_FORMAT_TOTAL_TIME_RUNNING
;
96 for (cpu
= 0; cpu
< nr_cpus
; cpu
++) {
97 fd
[cpu
][counter
] = sys_perf_counter_open(&hw_event
, -1, cpu
, -1, 0);
98 if (fd
[cpu
][counter
] < 0) {
99 printf("perfstat error: syscall returned with %d (%s)\n",
100 fd
[cpu
][counter
], strerror(errno
));
105 hw_event
.inherit
= inherit
;
106 hw_event
.disabled
= 1;
108 fd
[0][counter
] = sys_perf_counter_open(&hw_event
, 0, -1, -1, 0);
109 if (fd
[0][counter
] < 0) {
110 printf("perfstat error: syscall returned with %d (%s)\n",
111 fd
[0][counter
], strerror(errno
));
118 * Does the counter have nsecs as a unit?
120 static inline int nsec_counter(int counter
)
122 if (event_id
[counter
] == EID(PERF_TYPE_SOFTWARE
, PERF_COUNT_CPU_CLOCK
))
124 if (event_id
[counter
] == EID(PERF_TYPE_SOFTWARE
, PERF_COUNT_TASK_CLOCK
))
131 * Read out the results of a single counter:
133 static void read_counter(int counter
)
135 __u64
*count
, single_count
[3];
140 count
= event_res
[counter
];
142 count
[0] = count
[1] = count
[2] = 0;
145 for (cpu
= 0; cpu
< nr_cpus
; cpu
++) {
146 res
= read(fd
[cpu
][counter
], single_count
, nv
* sizeof(__u64
));
147 assert(res
== nv
* sizeof(__u64
));
149 count
[0] += single_count
[0];
151 count
[1] += single_count
[1];
152 count
[2] += single_count
[2];
159 event_scaled
[counter
] = -1;
164 if (count
[2] < count
[1]) {
165 event_scaled
[counter
] = 1;
166 count
[0] = (unsigned long long)
167 ((double)count
[0] * count
[1] / count
[2] + 0.5);
171 * Save the full runtime - to allow normalization during printout:
173 if (event_id
[counter
] == EID(PERF_TYPE_SOFTWARE
, PERF_COUNT_TASK_CLOCK
))
174 runtime_nsecs
= count
[0];
178 * Print out the results of a single counter:
180 static void print_counter(int counter
)
185 count
= event_res
[counter
];
186 scaled
= event_scaled
[counter
];
189 fprintf(stderr
, " %14s %-20s\n",
190 "<not counted>", event_name(counter
));
194 if (nsec_counter(counter
)) {
195 double msecs
= (double)count
[0] / 1000000;
197 fprintf(stderr
, " %14.6f %-20s",
198 msecs
, event_name(counter
));
199 if (event_id
[counter
] ==
200 EID(PERF_TYPE_SOFTWARE
, PERF_COUNT_TASK_CLOCK
)) {
202 fprintf(stderr
, " # %11.3f CPU utilization factor",
203 (double)count
[0] / (double)walltime_nsecs
);
206 fprintf(stderr
, " %14Ld %-20s",
207 count
[0], event_name(counter
));
209 fprintf(stderr
, " # %11.3f M/sec",
210 (double)count
[0]/runtime_nsecs
*1000.0);
213 fprintf(stderr
, " (scaled from %.2f%%)",
214 (double) count
[2] / count
[1] * 100);
215 fprintf(stderr
, "\n");
218 static int do_perfstat(int argc
, const char **argv
)
220 unsigned long long t0
, t1
;
228 for (counter
= 0; counter
< nr_counters
; counter
++)
229 create_perfstat_counter(counter
);
232 * Enable counters and exec the command:
235 prctl(PR_TASK_PERF_COUNTERS_ENABLE
);
237 if ((pid
= fork()) < 0)
238 perror("failed to fork");
240 if (execvp(argv
[0], (char **)argv
)) {
245 while (wait(&status
) >= 0)
247 prctl(PR_TASK_PERF_COUNTERS_DISABLE
);
250 walltime_nsecs
= t1
- t0
;
254 fprintf(stderr
, "\n");
255 fprintf(stderr
, " Performance counter stats for \'%s\':\n",
257 fprintf(stderr
, "\n");
259 for (counter
= 0; counter
< nr_counters
; counter
++)
260 read_counter(counter
);
262 for (counter
= 0; counter
< nr_counters
; counter
++)
263 print_counter(counter
);
266 fprintf(stderr
, "\n");
267 fprintf(stderr
, " Wall-clock time elapsed: %12.6f msecs\n",
268 (double)(t1
-t0
)/1e6
);
269 fprintf(stderr
, "\n");
274 static void skip_signal(int signo
)
278 static const char * const stat_usage
[] = {
279 "perf stat [<options>] <command>",
283 static char events_help_msg
[EVENTS_HELP_MAX
];
285 static const struct option options
[] = {
286 OPT_CALLBACK('e', "event", NULL
, "event",
287 events_help_msg
, parse_events
),
288 OPT_INTEGER('c', "count", &default_interval
,
289 "event period to sample"),
290 OPT_BOOLEAN('i', "inherit", &inherit
,
291 "child tasks inherit counters"),
292 OPT_INTEGER('p', "pid", &target_pid
,
293 "stat events on existing pid"),
294 OPT_BOOLEAN('a', "all-cpus", &system_wide
,
295 "system-wide collection from all CPUs"),
296 OPT_BOOLEAN('l', "scale", &scale
,
297 "scale/normalize counters"),
301 int cmd_stat(int argc
, const char **argv
, const char *prefix
)
305 page_size
= sysconf(_SC_PAGE_SIZE
);
307 create_events_help(events_help_msg
);
308 memcpy(event_id
, default_event_id
, sizeof(default_event_id
));
310 argc
= parse_options(argc
, argv
, options
, stat_usage
, 0);
312 usage_with_options(stat_usage
, options
);
318 for (counter
= 0; counter
< nr_counters
; counter
++) {
319 if (event_count
[counter
])
322 event_count
[counter
] = default_interval
;
324 nr_cpus
= sysconf(_SC_NPROCESSORS_ONLN
);
325 assert(nr_cpus
<= MAX_NR_CPUS
);
326 assert(nr_cpus
>= 0);
329 * We dont want to block the signals - that would cause
330 * child tasks to inherit that and Ctrl-C would not work.
331 * What we want is for Ctrl-C to work in the exec()-ed
332 * task, but being ignored by perf stat itself:
334 signal(SIGINT
, skip_signal
);
335 signal(SIGALRM
, skip_signal
);
336 signal(SIGABRT
, skip_signal
);
338 return do_perfstat(argc
, argv
);