]> git.proxmox.com Git - mirror_ubuntu-artful-kernel.git/blame - Documentation/perf_counter/builtin-record.c
perf_counter: Sanitize context locking
[mirror_ubuntu-artful-kernel.git] / Documentation / perf_counter / builtin-record.c
CommitLineData
de9ac07b
PZ
1
2
6eda5838
TG
3#include "util/util.h"
4
de9ac07b
PZ
5#include <sys/types.h>
6#include <sys/stat.h>
7#include <sys/time.h>
8#include <unistd.h>
9#include <stdint.h>
10#include <stdlib.h>
11#include <string.h>
12#include <limits.h>
13#include <getopt.h>
14#include <assert.h>
15#include <fcntl.h>
16#include <stdio.h>
17#include <errno.h>
de9ac07b
PZ
18#include <time.h>
19#include <sched.h>
20#include <pthread.h>
21
22#include <sys/syscall.h>
23#include <sys/ioctl.h>
24#include <sys/poll.h>
25#include <sys/prctl.h>
26#include <sys/wait.h>
27#include <sys/uio.h>
28#include <sys/mman.h>
29
30#include <linux/unistd.h>
31#include <linux/types.h>
32
33#include "../../include/linux/perf_counter.h"
34
6eda5838 35#include "perf.h"
de9ac07b 36
1a853e36
ACM
37#define ALIGN(x,a) __ALIGN_MASK(x,(typeof(x))(a)-1)
38#define __ALIGN_MASK(x,mask) (((x)+(mask))&~(mask))
39
de9ac07b
PZ
40static int nr_counters = 0;
41static __u64 event_id[MAX_COUNTERS] = { };
42static int default_interval = 100000;
43static int event_count[MAX_COUNTERS];
44static int fd[MAX_NR_CPUS][MAX_COUNTERS];
45static int nr_cpus = 0;
46static unsigned int page_size;
47static unsigned int mmap_pages = 16;
48static int output;
49static char *output_name = "output.perf";
50static int group = 0;
16c8a109
PZ
51static unsigned int realtime_prio = 0;
52static int system_wide = 0;
1a853e36 53static pid_t target_pid = -1;
16c8a109
PZ
54static int inherit = 1;
55static int nmi = 1;
de9ac07b
PZ
56
57const unsigned int default_count[] = {
58 1000000,
59 1000000,
60 10000,
61 10000,
62 1000000,
63 10000,
64};
65
de9ac07b
PZ
66struct event_symbol {
67 __u64 event;
68 char *symbol;
69};
70
71static struct event_symbol event_symbols[] = {
72 {EID(PERF_TYPE_HARDWARE, PERF_COUNT_CPU_CYCLES), "cpu-cycles", },
73 {EID(PERF_TYPE_HARDWARE, PERF_COUNT_CPU_CYCLES), "cycles", },
74 {EID(PERF_TYPE_HARDWARE, PERF_COUNT_INSTRUCTIONS), "instructions", },
75 {EID(PERF_TYPE_HARDWARE, PERF_COUNT_CACHE_REFERENCES), "cache-references", },
76 {EID(PERF_TYPE_HARDWARE, PERF_COUNT_CACHE_MISSES), "cache-misses", },
77 {EID(PERF_TYPE_HARDWARE, PERF_COUNT_BRANCH_INSTRUCTIONS), "branch-instructions", },
78 {EID(PERF_TYPE_HARDWARE, PERF_COUNT_BRANCH_INSTRUCTIONS), "branches", },
79 {EID(PERF_TYPE_HARDWARE, PERF_COUNT_BRANCH_MISSES), "branch-misses", },
80 {EID(PERF_TYPE_HARDWARE, PERF_COUNT_BUS_CYCLES), "bus-cycles", },
81
82 {EID(PERF_TYPE_SOFTWARE, PERF_COUNT_CPU_CLOCK), "cpu-clock", },
83 {EID(PERF_TYPE_SOFTWARE, PERF_COUNT_TASK_CLOCK), "task-clock", },
84 {EID(PERF_TYPE_SOFTWARE, PERF_COUNT_PAGE_FAULTS), "page-faults", },
85 {EID(PERF_TYPE_SOFTWARE, PERF_COUNT_PAGE_FAULTS), "faults", },
86 {EID(PERF_TYPE_SOFTWARE, PERF_COUNT_PAGE_FAULTS_MIN), "minor-faults", },
87 {EID(PERF_TYPE_SOFTWARE, PERF_COUNT_PAGE_FAULTS_MAJ), "major-faults", },
88 {EID(PERF_TYPE_SOFTWARE, PERF_COUNT_CONTEXT_SWITCHES), "context-switches", },
89 {EID(PERF_TYPE_SOFTWARE, PERF_COUNT_CONTEXT_SWITCHES), "cs", },
90 {EID(PERF_TYPE_SOFTWARE, PERF_COUNT_CPU_MIGRATIONS), "cpu-migrations", },
91 {EID(PERF_TYPE_SOFTWARE, PERF_COUNT_CPU_MIGRATIONS), "migrations", },
92};
93
94/*
95 * Each event can have multiple symbolic names.
96 * Symbolic names are (almost) exactly matched.
97 */
98static __u64 match_event_symbols(char *str)
99{
100 __u64 config, id;
101 int type;
102 unsigned int i;
103
104 if (sscanf(str, "r%llx", &config) == 1)
105 return config | PERF_COUNTER_RAW_MASK;
106
107 if (sscanf(str, "%d:%llu", &type, &id) == 2)
108 return EID(type, id);
109
110 for (i = 0; i < ARRAY_SIZE(event_symbols); i++) {
111 if (!strncmp(str, event_symbols[i].symbol,
112 strlen(event_symbols[i].symbol)))
113 return event_symbols[i].event;
114 }
115
116 return ~0ULL;
117}
118
119static int parse_events(char *str)
120{
121 __u64 config;
122
123again:
124 if (nr_counters == MAX_COUNTERS)
125 return -1;
126
127 config = match_event_symbols(str);
128 if (config == ~0ULL)
129 return -1;
130
131 event_id[nr_counters] = config;
132 nr_counters++;
133
134 str = strstr(str, ",");
135 if (str) {
136 str++;
137 goto again;
138 }
139
140 return 0;
141}
142
143#define __PERF_COUNTER_FIELD(config, name) \
144 ((config & PERF_COUNTER_##name##_MASK) >> PERF_COUNTER_##name##_SHIFT)
145
146#define PERF_COUNTER_RAW(config) __PERF_COUNTER_FIELD(config, RAW)
147#define PERF_COUNTER_CONFIG(config) __PERF_COUNTER_FIELD(config, CONFIG)
148#define PERF_COUNTER_TYPE(config) __PERF_COUNTER_FIELD(config, TYPE)
149#define PERF_COUNTER_ID(config) __PERF_COUNTER_FIELD(config, EVENT)
150
151static void display_events_help(void)
152{
153 unsigned int i;
154 __u64 e;
155
156 printf(
157 " -e EVENT --event=EVENT # symbolic-name abbreviations");
158
159 for (i = 0; i < ARRAY_SIZE(event_symbols); i++) {
160 int type, id;
161
162 e = event_symbols[i].event;
163 type = PERF_COUNTER_TYPE(e);
164 id = PERF_COUNTER_ID(e);
165
166 printf("\n %d:%d: %-20s",
167 type, id, event_symbols[i].symbol);
168 }
169
170 printf("\n"
171 " rNNN: raw PMU events (eventsel+umask)\n\n");
172}
173
174static void display_help(void)
175{
176 printf(
16c8a109 177 "Usage: perf-record [<options>] <cmd>\n"
de9ac07b
PZ
178 "perf-record Options (up to %d event types can be specified at once):\n\n",
179 MAX_COUNTERS);
180
181 display_events_help();
182
183 printf(
184 " -c CNT --count=CNT # event period to sample\n"
185 " -m pages --mmap_pages=<pages> # number of mmap data pages\n"
186 " -o file --output=<file> # output file\n"
1a853e36 187 " -p pid --pid=<pid> # record events on existing pid\n"
de9ac07b 188 " -r prio --realtime=<prio> # use RT prio\n"
16c8a109 189 " -s --system # system wide profiling\n"
de9ac07b
PZ
190 );
191
192 exit(0);
193}
194
16c8a109 195static void process_options(int argc, const char *argv[])
de9ac07b
PZ
196{
197 int error = 0, counter;
198
199 for (;;) {
200 int option_index = 0;
201 /** Options for getopt */
202 static struct option long_options[] = {
203 {"count", required_argument, NULL, 'c'},
204 {"event", required_argument, NULL, 'e'},
205 {"mmap_pages", required_argument, NULL, 'm'},
206 {"output", required_argument, NULL, 'o'},
1a853e36 207 {"pid", required_argument, NULL, 'p'},
de9ac07b 208 {"realtime", required_argument, NULL, 'r'},
16c8a109
PZ
209 {"system", no_argument, NULL, 's'},
210 {"inherit", no_argument, NULL, 'i'},
211 {"nmi", no_argument, NULL, 'n'},
de9ac07b
PZ
212 {NULL, 0, NULL, 0 }
213 };
1a853e36 214 int c = getopt_long(argc, argv, "+:c:e:m:o:p:r:sin",
de9ac07b
PZ
215 long_options, &option_index);
216 if (c == -1)
217 break;
218
219 switch (c) {
220 case 'c': default_interval = atoi(optarg); break;
221 case 'e': error = parse_events(optarg); break;
222 case 'm': mmap_pages = atoi(optarg); break;
223 case 'o': output_name = strdup(optarg); break;
1a853e36 224 case 'p': target_pid = atoi(optarg); break;
de9ac07b 225 case 'r': realtime_prio = atoi(optarg); break;
16c8a109
PZ
226 case 's': system_wide ^= 1; break;
227 case 'i': inherit ^= 1; break;
228 case 'n': nmi ^= 1; break;
de9ac07b
PZ
229 default: error = 1; break;
230 }
231 }
16c8a109 232
1a853e36 233 if (argc - optind == 0 && target_pid == -1)
16c8a109
PZ
234 error = 1;
235
de9ac07b
PZ
236 if (error)
237 display_help();
238
239 if (!nr_counters) {
240 nr_counters = 1;
241 event_id[0] = 0;
242 }
243
244 for (counter = 0; counter < nr_counters; counter++) {
245 if (event_count[counter])
246 continue;
247
248 event_count[counter] = default_interval;
249 }
250}
251
252struct mmap_data {
253 int counter;
254 void *base;
255 unsigned int mask;
256 unsigned int prev;
257};
258
259static unsigned int mmap_read_head(struct mmap_data *md)
260{
261 struct perf_counter_mmap_page *pc = md->base;
262 int head;
263
264 head = pc->data_head;
265 rmb();
266
267 return head;
268}
269
270static long events;
271static struct timeval last_read, this_read;
272
273static void mmap_read(struct mmap_data *md)
274{
275 unsigned int head = mmap_read_head(md);
276 unsigned int old = md->prev;
277 unsigned char *data = md->base + page_size;
278 unsigned long size;
279 void *buf;
280 int diff;
281
282 gettimeofday(&this_read, NULL);
283
284 /*
285 * If we're further behind than half the buffer, there's a chance
286 * the writer will bite our tail and screw up the events under us.
287 *
288 * If we somehow ended up ahead of the head, we got messed up.
289 *
290 * In either case, truncate and restart at head.
291 */
292 diff = head - old;
293 if (diff > md->mask / 2 || diff < 0) {
294 struct timeval iv;
295 unsigned long msecs;
296
297 timersub(&this_read, &last_read, &iv);
298 msecs = iv.tv_sec*1000 + iv.tv_usec/1000;
299
300 fprintf(stderr, "WARNING: failed to keep up with mmap data."
301 " Last read %lu msecs ago.\n", msecs);
302
303 /*
304 * head points to a known good entry, start there.
305 */
306 old = head;
307 }
308
309 last_read = this_read;
310
311 if (old != head)
312 events++;
313
314 size = head - old;
315
316 if ((old & md->mask) + size != (head & md->mask)) {
317 buf = &data[old & md->mask];
318 size = md->mask + 1 - (old & md->mask);
319 old += size;
320 while (size) {
321 int ret = write(output, buf, size);
322 if (ret < 0) {
323 perror("failed to write");
324 exit(-1);
325 }
326 size -= ret;
327 buf += ret;
328 }
329 }
330
331 buf = &data[old & md->mask];
332 size = head - old;
333 old += size;
334 while (size) {
335 int ret = write(output, buf, size);
336 if (ret < 0) {
337 perror("failed to write");
338 exit(-1);
339 }
340 size -= ret;
341 buf += ret;
342 }
343
344 md->prev = old;
345}
346
347static volatile int done = 0;
348
16c8a109 349static void sig_handler(int sig)
de9ac07b 350{
16c8a109 351 done = 1;
de9ac07b
PZ
352}
353
16c8a109
PZ
354static struct pollfd event_array[MAX_NR_CPUS * MAX_COUNTERS];
355static struct mmap_data mmap_array[MAX_NR_CPUS][MAX_COUNTERS];
356
357static int nr_poll;
358static int nr_cpu;
359
1a853e36
ACM
360struct mmap_event {
361 struct perf_event_header header;
362 __u32 pid, tid;
363 __u64 start;
364 __u64 len;
365 __u64 pgoff;
366 char filename[PATH_MAX];
367};
368struct comm_event {
369 struct perf_event_header header;
370 __u32 pid,tid;
371 char comm[16];
372};
373
374static pid_t pid_synthesize_comm_event(pid_t pid)
375{
376 char filename[PATH_MAX];
377 char bf[BUFSIZ];
378 struct comm_event comm_ev;
379 size_t size;
380 int fd;
381
382 snprintf(filename, sizeof(filename), "/proc/%d/stat", pid);
383
384 fd = open(filename, O_RDONLY);
385 if (fd < 0) {
386 fprintf(stderr, "couldn't open %s\n", filename);
387 exit(EXIT_FAILURE);
388 }
389 if (read(fd, bf, sizeof(bf)) < 0) {
390 fprintf(stderr, "couldn't read %s\n", filename);
391 exit(EXIT_FAILURE);
392 }
393 close(fd);
394
395 pid_t spid, ppid;
396 char state;
397 char comm[18];
398
399 memset(&comm_ev, 0, sizeof(comm_ev));
400 int nr = sscanf(bf, "%d %s %c %d %d ",
401 &spid, comm, &state, &ppid, &comm_ev.pid);
402 if (nr != 5) {
403 fprintf(stderr, "couldn't get COMM and pgid, malformed %s\n",
404 filename);
405 exit(EXIT_FAILURE);
406 }
407 comm_ev.header.type = PERF_EVENT_COMM;
408 comm_ev.tid = pid;
409 size = strlen(comm);
410 comm[--size] = '\0'; /* Remove the ')' at the end */
411 --size; /* Remove the '(' at the begin */
412 memcpy(comm_ev.comm, comm + 1, size);
413 size = ALIGN(size, sizeof(uint64_t));
414 comm_ev.header.size = sizeof(comm_ev) - (sizeof(comm_ev.comm) - size);
415 int ret = write(output, &comm_ev, comm_ev.header.size);
416 if (ret < 0) {
417 perror("failed to write");
418 exit(-1);
419 }
420 return comm_ev.pid;
421}
422
423static void pid_synthesize_mmap_events(pid_t pid, pid_t pgid)
424{
425 char filename[PATH_MAX];
426 FILE *fp;
427
428 snprintf(filename, sizeof(filename), "/proc/%d/maps", pid);
429
430 fp = fopen(filename, "r");
431 if (fp == NULL) {
432 fprintf(stderr, "couldn't open %s\n", filename);
433 exit(EXIT_FAILURE);
434 }
435 while (1) {
436 char bf[BUFSIZ];
437 unsigned char vm_read, vm_write, vm_exec, vm_mayshare;
438 struct mmap_event mmap_ev = {
439 .header.type = PERF_EVENT_MMAP,
440 };
441 unsigned long ino;
442 int major, minor;
443 size_t size;
444 if (fgets(bf, sizeof(bf), fp) == NULL)
445 break;
446
447 /* 00400000-0040c000 r-xp 00000000 fd:01 41038 /bin/cat */
448 sscanf(bf, "%llx-%llx %c%c%c%c %llx %x:%x %lu",
449 &mmap_ev.start, &mmap_ev.len,
450 &vm_read, &vm_write, &vm_exec, &vm_mayshare,
451 &mmap_ev.pgoff, &major, &minor, &ino);
452 if (vm_exec == 'x') {
453 char *execname = strrchr(bf, ' ');
454
455 if (execname == NULL || execname[1] != '/')
456 continue;
457
458 execname += 1;
459 size = strlen(execname);
460 execname[size - 1] = '\0'; /* Remove \n */
461 memcpy(mmap_ev.filename, execname, size);
462 size = ALIGN(size, sizeof(uint64_t));
463 mmap_ev.len -= mmap_ev.start;
464 mmap_ev.header.size = (sizeof(mmap_ev) -
465 (sizeof(mmap_ev.filename) - size));
466 mmap_ev.pid = pgid;
467 mmap_ev.tid = pid;
468
469 if (write(output, &mmap_ev, mmap_ev.header.size) < 0) {
470 perror("failed to write");
471 exit(-1);
472 }
473 }
474 }
475
476 fclose(fp);
477}
478
479static void open_counters(int cpu, pid_t pid)
de9ac07b 480{
de9ac07b 481 struct perf_counter_hw_event hw_event;
16c8a109
PZ
482 int counter, group_fd;
483 int track = 1;
16c8a109 484
1a853e36
ACM
485 if (pid > 0) {
486 pid_t pgid = pid_synthesize_comm_event(pid);
487 pid_synthesize_mmap_events(pid, pgid);
488 }
16c8a109
PZ
489
490 group_fd = -1;
491 for (counter = 0; counter < nr_counters; counter++) {
492
493 memset(&hw_event, 0, sizeof(hw_event));
494 hw_event.config = event_id[counter];
495 hw_event.irq_period = event_count[counter];
496 hw_event.record_type = PERF_RECORD_IP | PERF_RECORD_TID;
497 hw_event.nmi = nmi;
498 hw_event.mmap = track;
499 hw_event.comm = track;
500 hw_event.inherit = (cpu < 0) && inherit;
501
502 track = 0; // only the first counter needs these
503
504 fd[nr_cpu][counter] =
505 sys_perf_counter_open(&hw_event, pid, cpu, group_fd, 0);
506
507 if (fd[nr_cpu][counter] < 0) {
508 int err = errno;
509 printf("kerneltop error: syscall returned with %d (%s)\n",
510 fd[nr_cpu][counter], strerror(err));
511 if (err == EPERM)
512 printf("Are you root?\n");
513 exit(-1);
514 }
515 assert(fd[nr_cpu][counter] >= 0);
516 fcntl(fd[nr_cpu][counter], F_SETFL, O_NONBLOCK);
517
518 /*
519 * First counter acts as the group leader:
520 */
521 if (group && group_fd == -1)
522 group_fd = fd[nr_cpu][counter];
523
524 event_array[nr_poll].fd = fd[nr_cpu][counter];
525 event_array[nr_poll].events = POLLIN;
526 nr_poll++;
527
528 mmap_array[nr_cpu][counter].counter = counter;
529 mmap_array[nr_cpu][counter].prev = 0;
530 mmap_array[nr_cpu][counter].mask = mmap_pages*page_size - 1;
531 mmap_array[nr_cpu][counter].base = mmap(NULL, (mmap_pages+1)*page_size,
532 PROT_READ, MAP_SHARED, fd[nr_cpu][counter], 0);
533 if (mmap_array[nr_cpu][counter].base == MAP_FAILED) {
534 printf("kerneltop error: failed to mmap with %d (%s)\n",
535 errno, strerror(errno));
536 exit(-1);
537 }
538 }
539 nr_cpu++;
540}
541
542int cmd_record(int argc, const char **argv)
543{
544 int i, counter;
de9ac07b
PZ
545 pid_t pid;
546 int ret;
547
548 page_size = sysconf(_SC_PAGE_SIZE);
549
550 process_options(argc, argv);
551
552 nr_cpus = sysconf(_SC_NPROCESSORS_ONLN);
553 assert(nr_cpus <= MAX_NR_CPUS);
554 assert(nr_cpus >= 0);
555
556 output = open(output_name, O_CREAT|O_RDWR, S_IRWXU);
557 if (output < 0) {
558 perror("failed to create output file");
559 exit(-1);
560 }
561
562 argc -= optind;
563 argv += optind;
564
1a853e36
ACM
565 if (!system_wide) {
566 open_counters(-1, target_pid != -1 ? target_pid : 0);
567 } else for (i = 0; i < nr_cpus; i++)
568 open_counters(i, target_pid);
de9ac07b 569
16c8a109
PZ
570 signal(SIGCHLD, sig_handler);
571 signal(SIGINT, sig_handler);
de9ac07b 572
1a853e36
ACM
573 if (target_pid == -1) {
574 pid = fork();
575 if (pid < 0)
576 perror("failed to fork");
de9ac07b 577
1a853e36
ACM
578 if (!pid) {
579 if (execvp(argv[0], argv)) {
580 perror(argv[0]);
581 exit(-1);
582 }
de9ac07b
PZ
583 }
584 }
585
586 if (realtime_prio) {
587 struct sched_param param;
588
589 param.sched_priority = realtime_prio;
590 if (sched_setscheduler(0, SCHED_FIFO, &param)) {
591 printf("Could not set realtime priority.\n");
592 exit(-1);
593 }
594 }
595
596 /*
597 * TODO: store the current /proc/$/maps information somewhere
598 */
599
600 while (!done) {
601 int hits = events;
602
16c8a109 603 for (i = 0; i < nr_cpu; i++) {
de9ac07b
PZ
604 for (counter = 0; counter < nr_counters; counter++)
605 mmap_read(&mmap_array[i][counter]);
606 }
607
608 if (hits == events)
609 ret = poll(event_array, nr_poll, 100);
610 }
611
612 return 0;
613}