]> git.proxmox.com Git - mirror_ubuntu-bionic-kernel.git/blame - Documentation/perf_counter/builtin-record.c
perf stat: Remove unused variable
[mirror_ubuntu-bionic-kernel.git] / Documentation / perf_counter / builtin-record.c
CommitLineData
de9ac07b
PZ
1
2
1a482f38 3#include "perf.h"
6eda5838
TG
4#include "util/util.h"
5
de9ac07b
PZ
6#include <sys/types.h>
7#include <sys/stat.h>
8#include <sys/time.h>
9#include <unistd.h>
10#include <stdint.h>
11#include <stdlib.h>
12#include <string.h>
13#include <limits.h>
14#include <getopt.h>
15#include <assert.h>
16#include <fcntl.h>
17#include <stdio.h>
18#include <errno.h>
de9ac07b
PZ
19#include <time.h>
20#include <sched.h>
21#include <pthread.h>
22
23#include <sys/syscall.h>
24#include <sys/ioctl.h>
25#include <sys/poll.h>
26#include <sys/prctl.h>
27#include <sys/wait.h>
28#include <sys/uio.h>
29#include <sys/mman.h>
30
31#include <linux/unistd.h>
32#include <linux/types.h>
33
de9ac07b 34
de9ac07b 35
1a853e36
ACM
36#define ALIGN(x,a) __ALIGN_MASK(x,(typeof(x))(a)-1)
37#define __ALIGN_MASK(x,mask) (((x)+(mask))&~(mask))
38
de9ac07b
PZ
39static int nr_counters = 0;
40static __u64 event_id[MAX_COUNTERS] = { };
41static int default_interval = 100000;
42static int event_count[MAX_COUNTERS];
43static int fd[MAX_NR_CPUS][MAX_COUNTERS];
44static int nr_cpus = 0;
45static unsigned int page_size;
46static unsigned int mmap_pages = 16;
47static int output;
48static char *output_name = "output.perf";
49static int group = 0;
16c8a109
PZ
50static unsigned int realtime_prio = 0;
51static int system_wide = 0;
1a853e36 52static pid_t target_pid = -1;
16c8a109
PZ
53static int inherit = 1;
54static int nmi = 1;
de9ac07b
PZ
55
56const unsigned int default_count[] = {
57 1000000,
58 1000000,
59 10000,
60 10000,
61 1000000,
62 10000,
63};
64
de9ac07b
PZ
65struct event_symbol {
66 __u64 event;
67 char *symbol;
68};
69
70static struct event_symbol event_symbols[] = {
71 {EID(PERF_TYPE_HARDWARE, PERF_COUNT_CPU_CYCLES), "cpu-cycles", },
72 {EID(PERF_TYPE_HARDWARE, PERF_COUNT_CPU_CYCLES), "cycles", },
73 {EID(PERF_TYPE_HARDWARE, PERF_COUNT_INSTRUCTIONS), "instructions", },
74 {EID(PERF_TYPE_HARDWARE, PERF_COUNT_CACHE_REFERENCES), "cache-references", },
75 {EID(PERF_TYPE_HARDWARE, PERF_COUNT_CACHE_MISSES), "cache-misses", },
76 {EID(PERF_TYPE_HARDWARE, PERF_COUNT_BRANCH_INSTRUCTIONS), "branch-instructions", },
77 {EID(PERF_TYPE_HARDWARE, PERF_COUNT_BRANCH_INSTRUCTIONS), "branches", },
78 {EID(PERF_TYPE_HARDWARE, PERF_COUNT_BRANCH_MISSES), "branch-misses", },
79 {EID(PERF_TYPE_HARDWARE, PERF_COUNT_BUS_CYCLES), "bus-cycles", },
80
81 {EID(PERF_TYPE_SOFTWARE, PERF_COUNT_CPU_CLOCK), "cpu-clock", },
82 {EID(PERF_TYPE_SOFTWARE, PERF_COUNT_TASK_CLOCK), "task-clock", },
83 {EID(PERF_TYPE_SOFTWARE, PERF_COUNT_PAGE_FAULTS), "page-faults", },
84 {EID(PERF_TYPE_SOFTWARE, PERF_COUNT_PAGE_FAULTS), "faults", },
85 {EID(PERF_TYPE_SOFTWARE, PERF_COUNT_PAGE_FAULTS_MIN), "minor-faults", },
86 {EID(PERF_TYPE_SOFTWARE, PERF_COUNT_PAGE_FAULTS_MAJ), "major-faults", },
87 {EID(PERF_TYPE_SOFTWARE, PERF_COUNT_CONTEXT_SWITCHES), "context-switches", },
88 {EID(PERF_TYPE_SOFTWARE, PERF_COUNT_CONTEXT_SWITCHES), "cs", },
89 {EID(PERF_TYPE_SOFTWARE, PERF_COUNT_CPU_MIGRATIONS), "cpu-migrations", },
90 {EID(PERF_TYPE_SOFTWARE, PERF_COUNT_CPU_MIGRATIONS), "migrations", },
91};
92
93/*
94 * Each event can have multiple symbolic names.
95 * Symbolic names are (almost) exactly matched.
96 */
97static __u64 match_event_symbols(char *str)
98{
99 __u64 config, id;
100 int type;
101 unsigned int i;
102
103 if (sscanf(str, "r%llx", &config) == 1)
104 return config | PERF_COUNTER_RAW_MASK;
105
106 if (sscanf(str, "%d:%llu", &type, &id) == 2)
107 return EID(type, id);
108
109 for (i = 0; i < ARRAY_SIZE(event_symbols); i++) {
110 if (!strncmp(str, event_symbols[i].symbol,
111 strlen(event_symbols[i].symbol)))
112 return event_symbols[i].event;
113 }
114
115 return ~0ULL;
116}
117
118static int parse_events(char *str)
119{
120 __u64 config;
121
122again:
123 if (nr_counters == MAX_COUNTERS)
124 return -1;
125
126 config = match_event_symbols(str);
127 if (config == ~0ULL)
128 return -1;
129
130 event_id[nr_counters] = config;
131 nr_counters++;
132
133 str = strstr(str, ",");
134 if (str) {
135 str++;
136 goto again;
137 }
138
139 return 0;
140}
141
142#define __PERF_COUNTER_FIELD(config, name) \
143 ((config & PERF_COUNTER_##name##_MASK) >> PERF_COUNTER_##name##_SHIFT)
144
145#define PERF_COUNTER_RAW(config) __PERF_COUNTER_FIELD(config, RAW)
146#define PERF_COUNTER_CONFIG(config) __PERF_COUNTER_FIELD(config, CONFIG)
147#define PERF_COUNTER_TYPE(config) __PERF_COUNTER_FIELD(config, TYPE)
148#define PERF_COUNTER_ID(config) __PERF_COUNTER_FIELD(config, EVENT)
149
150static void display_events_help(void)
151{
152 unsigned int i;
153 __u64 e;
154
155 printf(
156 " -e EVENT --event=EVENT # symbolic-name abbreviations");
157
158 for (i = 0; i < ARRAY_SIZE(event_symbols); i++) {
159 int type, id;
160
161 e = event_symbols[i].event;
162 type = PERF_COUNTER_TYPE(e);
163 id = PERF_COUNTER_ID(e);
164
165 printf("\n %d:%d: %-20s",
166 type, id, event_symbols[i].symbol);
167 }
168
169 printf("\n"
170 " rNNN: raw PMU events (eventsel+umask)\n\n");
171}
172
173static void display_help(void)
174{
175 printf(
16c8a109 176 "Usage: perf-record [<options>] <cmd>\n"
de9ac07b
PZ
177 "perf-record Options (up to %d event types can be specified at once):\n\n",
178 MAX_COUNTERS);
179
180 display_events_help();
181
182 printf(
183 " -c CNT --count=CNT # event period to sample\n"
184 " -m pages --mmap_pages=<pages> # number of mmap data pages\n"
185 " -o file --output=<file> # output file\n"
1a853e36 186 " -p pid --pid=<pid> # record events on existing pid\n"
de9ac07b 187 " -r prio --realtime=<prio> # use RT prio\n"
16c8a109 188 " -s --system # system wide profiling\n"
de9ac07b
PZ
189 );
190
191 exit(0);
192}
193
69aa48ab 194static void process_options(int argc, char * const argv[])
de9ac07b
PZ
195{
196 int error = 0, counter;
197
198 for (;;) {
199 int option_index = 0;
200 /** Options for getopt */
201 static struct option long_options[] = {
202 {"count", required_argument, NULL, 'c'},
203 {"event", required_argument, NULL, 'e'},
204 {"mmap_pages", required_argument, NULL, 'm'},
205 {"output", required_argument, NULL, 'o'},
1a853e36 206 {"pid", required_argument, NULL, 'p'},
de9ac07b 207 {"realtime", required_argument, NULL, 'r'},
16c8a109
PZ
208 {"system", no_argument, NULL, 's'},
209 {"inherit", no_argument, NULL, 'i'},
210 {"nmi", no_argument, NULL, 'n'},
de9ac07b
PZ
211 {NULL, 0, NULL, 0 }
212 };
1a853e36 213 int c = getopt_long(argc, argv, "+:c:e:m:o:p:r:sin",
de9ac07b
PZ
214 long_options, &option_index);
215 if (c == -1)
216 break;
217
218 switch (c) {
219 case 'c': default_interval = atoi(optarg); break;
220 case 'e': error = parse_events(optarg); break;
221 case 'm': mmap_pages = atoi(optarg); break;
222 case 'o': output_name = strdup(optarg); break;
1a853e36 223 case 'p': target_pid = atoi(optarg); break;
de9ac07b 224 case 'r': realtime_prio = atoi(optarg); break;
16c8a109
PZ
225 case 's': system_wide ^= 1; break;
226 case 'i': inherit ^= 1; break;
227 case 'n': nmi ^= 1; break;
de9ac07b
PZ
228 default: error = 1; break;
229 }
230 }
16c8a109 231
1a853e36 232 if (argc - optind == 0 && target_pid == -1)
16c8a109
PZ
233 error = 1;
234
de9ac07b
PZ
235 if (error)
236 display_help();
237
238 if (!nr_counters) {
239 nr_counters = 1;
240 event_id[0] = 0;
241 }
242
243 for (counter = 0; counter < nr_counters; counter++) {
244 if (event_count[counter])
245 continue;
246
247 event_count[counter] = default_interval;
248 }
249}
250
251struct mmap_data {
252 int counter;
253 void *base;
254 unsigned int mask;
255 unsigned int prev;
256};
257
258static unsigned int mmap_read_head(struct mmap_data *md)
259{
260 struct perf_counter_mmap_page *pc = md->base;
261 int head;
262
263 head = pc->data_head;
264 rmb();
265
266 return head;
267}
268
269static long events;
270static struct timeval last_read, this_read;
271
272static void mmap_read(struct mmap_data *md)
273{
274 unsigned int head = mmap_read_head(md);
275 unsigned int old = md->prev;
276 unsigned char *data = md->base + page_size;
277 unsigned long size;
278 void *buf;
279 int diff;
280
281 gettimeofday(&this_read, NULL);
282
283 /*
284 * If we're further behind than half the buffer, there's a chance
285 * the writer will bite our tail and screw up the events under us.
286 *
287 * If we somehow ended up ahead of the head, we got messed up.
288 *
289 * In either case, truncate and restart at head.
290 */
291 diff = head - old;
292 if (diff > md->mask / 2 || diff < 0) {
293 struct timeval iv;
294 unsigned long msecs;
295
296 timersub(&this_read, &last_read, &iv);
297 msecs = iv.tv_sec*1000 + iv.tv_usec/1000;
298
299 fprintf(stderr, "WARNING: failed to keep up with mmap data."
300 " Last read %lu msecs ago.\n", msecs);
301
302 /*
303 * head points to a known good entry, start there.
304 */
305 old = head;
306 }
307
308 last_read = this_read;
309
310 if (old != head)
311 events++;
312
313 size = head - old;
314
315 if ((old & md->mask) + size != (head & md->mask)) {
316 buf = &data[old & md->mask];
317 size = md->mask + 1 - (old & md->mask);
318 old += size;
319 while (size) {
320 int ret = write(output, buf, size);
321 if (ret < 0) {
322 perror("failed to write");
323 exit(-1);
324 }
325 size -= ret;
326 buf += ret;
327 }
328 }
329
330 buf = &data[old & md->mask];
331 size = head - old;
332 old += size;
333 while (size) {
334 int ret = write(output, buf, size);
335 if (ret < 0) {
336 perror("failed to write");
337 exit(-1);
338 }
339 size -= ret;
340 buf += ret;
341 }
342
343 md->prev = old;
344}
345
346static volatile int done = 0;
347
16c8a109 348static void sig_handler(int sig)
de9ac07b 349{
16c8a109 350 done = 1;
de9ac07b
PZ
351}
352
16c8a109
PZ
353static struct pollfd event_array[MAX_NR_CPUS * MAX_COUNTERS];
354static struct mmap_data mmap_array[MAX_NR_CPUS][MAX_COUNTERS];
355
356static int nr_poll;
357static int nr_cpu;
358
1a853e36
ACM
359struct mmap_event {
360 struct perf_event_header header;
361 __u32 pid, tid;
362 __u64 start;
363 __u64 len;
364 __u64 pgoff;
365 char filename[PATH_MAX];
366};
367struct comm_event {
368 struct perf_event_header header;
369 __u32 pid,tid;
370 char comm[16];
371};
372
373static pid_t pid_synthesize_comm_event(pid_t pid)
374{
375 char filename[PATH_MAX];
376 char bf[BUFSIZ];
377 struct comm_event comm_ev;
378 size_t size;
379 int fd;
380
381 snprintf(filename, sizeof(filename), "/proc/%d/stat", pid);
382
383 fd = open(filename, O_RDONLY);
384 if (fd < 0) {
385 fprintf(stderr, "couldn't open %s\n", filename);
386 exit(EXIT_FAILURE);
387 }
388 if (read(fd, bf, sizeof(bf)) < 0) {
389 fprintf(stderr, "couldn't read %s\n", filename);
390 exit(EXIT_FAILURE);
391 }
392 close(fd);
393
394 pid_t spid, ppid;
395 char state;
396 char comm[18];
397
398 memset(&comm_ev, 0, sizeof(comm_ev));
399 int nr = sscanf(bf, "%d %s %c %d %d ",
400 &spid, comm, &state, &ppid, &comm_ev.pid);
401 if (nr != 5) {
402 fprintf(stderr, "couldn't get COMM and pgid, malformed %s\n",
403 filename);
404 exit(EXIT_FAILURE);
405 }
406 comm_ev.header.type = PERF_EVENT_COMM;
407 comm_ev.tid = pid;
408 size = strlen(comm);
409 comm[--size] = '\0'; /* Remove the ')' at the end */
410 --size; /* Remove the '(' at the begin */
411 memcpy(comm_ev.comm, comm + 1, size);
412 size = ALIGN(size, sizeof(uint64_t));
413 comm_ev.header.size = sizeof(comm_ev) - (sizeof(comm_ev.comm) - size);
414 int ret = write(output, &comm_ev, comm_ev.header.size);
415 if (ret < 0) {
416 perror("failed to write");
417 exit(-1);
418 }
419 return comm_ev.pid;
420}
421
422static void pid_synthesize_mmap_events(pid_t pid, pid_t pgid)
423{
424 char filename[PATH_MAX];
425 FILE *fp;
426
427 snprintf(filename, sizeof(filename), "/proc/%d/maps", pid);
428
429 fp = fopen(filename, "r");
430 if (fp == NULL) {
431 fprintf(stderr, "couldn't open %s\n", filename);
432 exit(EXIT_FAILURE);
433 }
434 while (1) {
435 char bf[BUFSIZ];
436 unsigned char vm_read, vm_write, vm_exec, vm_mayshare;
437 struct mmap_event mmap_ev = {
438 .header.type = PERF_EVENT_MMAP,
439 };
440 unsigned long ino;
441 int major, minor;
442 size_t size;
443 if (fgets(bf, sizeof(bf), fp) == NULL)
444 break;
445
446 /* 00400000-0040c000 r-xp 00000000 fd:01 41038 /bin/cat */
447 sscanf(bf, "%llx-%llx %c%c%c%c %llx %x:%x %lu",
448 &mmap_ev.start, &mmap_ev.len,
449 &vm_read, &vm_write, &vm_exec, &vm_mayshare,
450 &mmap_ev.pgoff, &major, &minor, &ino);
451 if (vm_exec == 'x') {
452 char *execname = strrchr(bf, ' ');
453
454 if (execname == NULL || execname[1] != '/')
455 continue;
456
457 execname += 1;
458 size = strlen(execname);
459 execname[size - 1] = '\0'; /* Remove \n */
460 memcpy(mmap_ev.filename, execname, size);
461 size = ALIGN(size, sizeof(uint64_t));
462 mmap_ev.len -= mmap_ev.start;
463 mmap_ev.header.size = (sizeof(mmap_ev) -
464 (sizeof(mmap_ev.filename) - size));
465 mmap_ev.pid = pgid;
466 mmap_ev.tid = pid;
467
468 if (write(output, &mmap_ev, mmap_ev.header.size) < 0) {
469 perror("failed to write");
470 exit(-1);
471 }
472 }
473 }
474
475 fclose(fp);
476}
477
478static void open_counters(int cpu, pid_t pid)
de9ac07b 479{
de9ac07b 480 struct perf_counter_hw_event hw_event;
16c8a109
PZ
481 int counter, group_fd;
482 int track = 1;
16c8a109 483
1a853e36
ACM
484 if (pid > 0) {
485 pid_t pgid = pid_synthesize_comm_event(pid);
486 pid_synthesize_mmap_events(pid, pgid);
487 }
16c8a109
PZ
488
489 group_fd = -1;
490 for (counter = 0; counter < nr_counters; counter++) {
491
492 memset(&hw_event, 0, sizeof(hw_event));
493 hw_event.config = event_id[counter];
494 hw_event.irq_period = event_count[counter];
495 hw_event.record_type = PERF_RECORD_IP | PERF_RECORD_TID;
496 hw_event.nmi = nmi;
497 hw_event.mmap = track;
498 hw_event.comm = track;
499 hw_event.inherit = (cpu < 0) && inherit;
500
501 track = 0; // only the first counter needs these
502
503 fd[nr_cpu][counter] =
504 sys_perf_counter_open(&hw_event, pid, cpu, group_fd, 0);
505
506 if (fd[nr_cpu][counter] < 0) {
507 int err = errno;
508 printf("kerneltop error: syscall returned with %d (%s)\n",
509 fd[nr_cpu][counter], strerror(err));
510 if (err == EPERM)
511 printf("Are you root?\n");
512 exit(-1);
513 }
514 assert(fd[nr_cpu][counter] >= 0);
515 fcntl(fd[nr_cpu][counter], F_SETFL, O_NONBLOCK);
516
517 /*
518 * First counter acts as the group leader:
519 */
520 if (group && group_fd == -1)
521 group_fd = fd[nr_cpu][counter];
522
523 event_array[nr_poll].fd = fd[nr_cpu][counter];
524 event_array[nr_poll].events = POLLIN;
525 nr_poll++;
526
527 mmap_array[nr_cpu][counter].counter = counter;
528 mmap_array[nr_cpu][counter].prev = 0;
529 mmap_array[nr_cpu][counter].mask = mmap_pages*page_size - 1;
530 mmap_array[nr_cpu][counter].base = mmap(NULL, (mmap_pages+1)*page_size,
531 PROT_READ, MAP_SHARED, fd[nr_cpu][counter], 0);
532 if (mmap_array[nr_cpu][counter].base == MAP_FAILED) {
533 printf("kerneltop error: failed to mmap with %d (%s)\n",
534 errno, strerror(errno));
535 exit(-1);
536 }
537 }
538 nr_cpu++;
539}
540
69aa48ab 541int cmd_record(int argc, char * const argv[])
16c8a109
PZ
542{
543 int i, counter;
de9ac07b
PZ
544 pid_t pid;
545 int ret;
546
547 page_size = sysconf(_SC_PAGE_SIZE);
548
549 process_options(argc, argv);
550
551 nr_cpus = sysconf(_SC_NPROCESSORS_ONLN);
552 assert(nr_cpus <= MAX_NR_CPUS);
553 assert(nr_cpus >= 0);
554
555 output = open(output_name, O_CREAT|O_RDWR, S_IRWXU);
556 if (output < 0) {
557 perror("failed to create output file");
558 exit(-1);
559 }
560
561 argc -= optind;
562 argv += optind;
563
1a853e36
ACM
564 if (!system_wide) {
565 open_counters(-1, target_pid != -1 ? target_pid : 0);
566 } else for (i = 0; i < nr_cpus; i++)
567 open_counters(i, target_pid);
de9ac07b 568
16c8a109
PZ
569 signal(SIGCHLD, sig_handler);
570 signal(SIGINT, sig_handler);
de9ac07b 571
1a853e36
ACM
572 if (target_pid == -1) {
573 pid = fork();
574 if (pid < 0)
575 perror("failed to fork");
de9ac07b 576
1a853e36
ACM
577 if (!pid) {
578 if (execvp(argv[0], argv)) {
579 perror(argv[0]);
580 exit(-1);
581 }
de9ac07b
PZ
582 }
583 }
584
585 if (realtime_prio) {
586 struct sched_param param;
587
588 param.sched_priority = realtime_prio;
589 if (sched_setscheduler(0, SCHED_FIFO, &param)) {
590 printf("Could not set realtime priority.\n");
591 exit(-1);
592 }
593 }
594
595 /*
596 * TODO: store the current /proc/$/maps information somewhere
597 */
598
599 while (!done) {
600 int hits = events;
601
16c8a109 602 for (i = 0; i < nr_cpu; i++) {
de9ac07b
PZ
603 for (counter = 0; counter < nr_counters; counter++)
604 mmap_read(&mmap_array[i][counter]);
605 }
606
607 if (hits == events)
608 ret = poll(event_array, nr_poll, 100);
609 }
610
611 return 0;
612}