]> git.proxmox.com Git - mirror_ubuntu-artful-kernel.git/blame - arch/x86/kernel/cpu/perf_event.c
x86: Set cpu masks before calling CPU_STARTING notifiers
[mirror_ubuntu-artful-kernel.git] / arch / x86 / kernel / cpu / perf_event.c
CommitLineData
241771ef 1/*
cdd6c482 2 * Performance events x86 architecture code
241771ef 3 *
98144511
IM
4 * Copyright (C) 2008 Thomas Gleixner <tglx@linutronix.de>
5 * Copyright (C) 2008-2009 Red Hat, Inc., Ingo Molnar
6 * Copyright (C) 2009 Jaswinder Singh Rajput
7 * Copyright (C) 2009 Advanced Micro Devices, Inc., Robert Richter
8 * Copyright (C) 2008-2009 Red Hat, Inc., Peter Zijlstra <pzijlstr@redhat.com>
30dd568c 9 * Copyright (C) 2009 Intel Corporation, <markus.t.metzger@intel.com>
1da53e02 10 * Copyright (C) 2009 Google, Inc., Stephane Eranian
241771ef
IM
11 *
12 * For licencing details see kernel-base/COPYING
13 */
14
cdd6c482 15#include <linux/perf_event.h>
241771ef
IM
16#include <linux/capability.h>
17#include <linux/notifier.h>
18#include <linux/hardirq.h>
19#include <linux/kprobes.h>
4ac13294 20#include <linux/module.h>
241771ef
IM
21#include <linux/kdebug.h>
22#include <linux/sched.h>
d7d59fb3 23#include <linux/uaccess.h>
5a0e3ad6 24#include <linux/slab.h>
74193ef0 25#include <linux/highmem.h>
30dd568c 26#include <linux/cpu.h>
272d30be 27#include <linux/bitops.h>
241771ef 28
241771ef 29#include <asm/apic.h>
d7d59fb3 30#include <asm/stacktrace.h>
4e935e47 31#include <asm/nmi.h>
257ef9d2 32#include <asm/compat.h>
241771ef 33
7645a24c
PZ
34#if 0
35#undef wrmsrl
36#define wrmsrl(msr, val) \
37do { \
38 trace_printk("wrmsrl(%lx, %lx)\n", (unsigned long)(msr),\
39 (unsigned long)(val)); \
40 native_write_msr((msr), (u32)((u64)(val)), \
41 (u32)((u64)(val) >> 32)); \
42} while (0)
43#endif
44
ef21f683
PZ
45/*
46 * best effort, GUP based copy_from_user() that assumes IRQ or NMI context
47 */
48static unsigned long
49copy_from_user_nmi(void *to, const void __user *from, unsigned long n)
50{
51 unsigned long offset, addr = (unsigned long)from;
ef21f683
PZ
52 unsigned long size, len = 0;
53 struct page *page;
54 void *map;
55 int ret;
56
57 do {
58 ret = __get_user_pages_fast(addr, 1, 0, &page);
59 if (!ret)
60 break;
61
62 offset = addr & (PAGE_SIZE - 1);
63 size = min(PAGE_SIZE - offset, n - len);
64
7a837d1b 65 map = kmap_atomic(page);
ef21f683 66 memcpy(to, map+offset, size);
7a837d1b 67 kunmap_atomic(map);
ef21f683
PZ
68 put_page(page);
69
70 len += size;
71 to += size;
72 addr += size;
73
74 } while (len < n);
75
76 return len;
77}
78
1da53e02 79struct event_constraint {
c91e0f5d
PZ
80 union {
81 unsigned long idxmsk[BITS_TO_LONGS(X86_PMC_IDX_MAX)];
b622d644 82 u64 idxmsk64;
c91e0f5d 83 };
b622d644
PZ
84 u64 code;
85 u64 cmask;
272d30be 86 int weight;
1da53e02
SE
87};
88
38331f62
SE
89struct amd_nb {
90 int nb_id; /* NorthBridge id */
91 int refcnt; /* reference count */
92 struct perf_event *owners[X86_PMC_IDX_MAX];
93 struct event_constraint event_constraints[X86_PMC_IDX_MAX];
94};
95
caff2bef
PZ
96#define MAX_LBR_ENTRIES 16
97
cdd6c482 98struct cpu_hw_events {
ca037701
PZ
99 /*
100 * Generic x86 PMC bits
101 */
1da53e02 102 struct perf_event *events[X86_PMC_IDX_MAX]; /* in counter order */
43f6201a 103 unsigned long active_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)];
63e6be6d 104 unsigned long running[BITS_TO_LONGS(X86_PMC_IDX_MAX)];
b0f3f28e 105 int enabled;
241771ef 106
1da53e02
SE
107 int n_events;
108 int n_added;
90151c35 109 int n_txn;
1da53e02 110 int assign[X86_PMC_IDX_MAX]; /* event to counter assignment */
447a194b 111 u64 tags[X86_PMC_IDX_MAX];
1da53e02 112 struct perf_event *event_list[X86_PMC_IDX_MAX]; /* in enabled order */
ca037701 113
4d1c52b0
LM
114 unsigned int group_flag;
115
ca037701
PZ
116 /*
117 * Intel DebugStore bits
118 */
119 struct debug_store *ds;
120 u64 pebs_enabled;
121
caff2bef
PZ
122 /*
123 * Intel LBR bits
124 */
125 int lbr_users;
126 void *lbr_context;
127 struct perf_branch_stack lbr_stack;
128 struct perf_branch_entry lbr_entries[MAX_LBR_ENTRIES];
129
ca037701
PZ
130 /*
131 * AMD specific bits
132 */
38331f62 133 struct amd_nb *amd_nb;
b690081d
SE
134};
135
fce877e3 136#define __EVENT_CONSTRAINT(c, n, m, w) {\
b622d644 137 { .idxmsk64 = (n) }, \
c91e0f5d
PZ
138 .code = (c), \
139 .cmask = (m), \
fce877e3 140 .weight = (w), \
c91e0f5d 141}
b690081d 142
fce877e3
PZ
143#define EVENT_CONSTRAINT(c, n, m) \
144 __EVENT_CONSTRAINT(c, n, m, HWEIGHT(n))
145
ca037701
PZ
146/*
147 * Constraint on the Event code.
148 */
ed8777fc 149#define INTEL_EVENT_CONSTRAINT(c, n) \
a098f448 150 EVENT_CONSTRAINT(c, n, ARCH_PERFMON_EVENTSEL_EVENT)
8433be11 151
ca037701
PZ
152/*
153 * Constraint on the Event code + UMask + fixed-mask
a098f448
RR
154 *
155 * filter mask to validate fixed counter events.
156 * the following filters disqualify for fixed counters:
157 * - inv
158 * - edge
159 * - cnt-mask
160 * The other filters are supported by fixed counters.
161 * The any-thread option is supported starting with v3.
ca037701 162 */
ed8777fc 163#define FIXED_EVENT_CONSTRAINT(c, n) \
a098f448 164 EVENT_CONSTRAINT(c, (1ULL << (32+n)), X86_RAW_EVENT_MASK)
8433be11 165
ca037701
PZ
166/*
167 * Constraint on the Event code + UMask
168 */
169#define PEBS_EVENT_CONSTRAINT(c, n) \
170 EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK)
171
ed8777fc
PZ
172#define EVENT_CONSTRAINT_END \
173 EVENT_CONSTRAINT(0, 0, 0)
174
175#define for_each_event_constraint(e, c) \
a1f2b70a 176 for ((e) = (c); (e)->weight; (e)++)
b690081d 177
8db909a7
PZ
178union perf_capabilities {
179 struct {
180 u64 lbr_format : 6;
181 u64 pebs_trap : 1;
182 u64 pebs_arch_reg : 1;
183 u64 pebs_format : 4;
184 u64 smm_freeze : 1;
185 };
186 u64 capabilities;
187};
188
241771ef 189/*
5f4ec28f 190 * struct x86_pmu - generic x86 pmu
241771ef 191 */
5f4ec28f 192struct x86_pmu {
ca037701
PZ
193 /*
194 * Generic x86 PMC bits
195 */
faa28ae0
RR
196 const char *name;
197 int version;
a3288106 198 int (*handle_irq)(struct pt_regs *);
9e35ad38 199 void (*disable_all)(void);
11164cd4 200 void (*enable_all)(int added);
aff3d91a
PZ
201 void (*enable)(struct perf_event *);
202 void (*disable)(struct perf_event *);
b4cdc5c2 203 int (*hw_config)(struct perf_event *event);
a072738e 204 int (*schedule_events)(struct cpu_hw_events *cpuc, int n, int *assign);
169e41eb
JSR
205 unsigned eventsel;
206 unsigned perfctr;
b0f3f28e 207 u64 (*event_map)(int);
169e41eb 208 int max_events;
948b1bb8
RR
209 int num_counters;
210 int num_counters_fixed;
211 int cntval_bits;
212 u64 cntval_mask;
04da8a43 213 int apic;
c619b8ff 214 u64 max_period;
63b14649
PZ
215 struct event_constraint *
216 (*get_event_constraints)(struct cpu_hw_events *cpuc,
217 struct perf_event *event);
218
c91e0f5d
PZ
219 void (*put_event_constraints)(struct cpu_hw_events *cpuc,
220 struct perf_event *event);
63b14649 221 struct event_constraint *event_constraints;
3c44780b 222 void (*quirks)(void);
68aa00ac 223 int perfctr_second_write;
3f6da390 224
b38b24ea 225 int (*cpu_prepare)(int cpu);
3f6da390
PZ
226 void (*cpu_starting)(int cpu);
227 void (*cpu_dying)(int cpu);
228 void (*cpu_dead)(int cpu);
ca037701
PZ
229
230 /*
231 * Intel Arch Perfmon v2+
232 */
8db909a7
PZ
233 u64 intel_ctrl;
234 union perf_capabilities intel_cap;
ca037701
PZ
235
236 /*
237 * Intel DebugStore bits
238 */
239 int bts, pebs;
6809b6ea 240 int bts_active, pebs_active;
ca037701
PZ
241 int pebs_record_size;
242 void (*drain_pebs)(struct pt_regs *regs);
243 struct event_constraint *pebs_constraints;
caff2bef
PZ
244
245 /*
246 * Intel LBR
247 */
248 unsigned long lbr_tos, lbr_from, lbr_to; /* MSR base regs */
249 int lbr_nr; /* hardware stack size */
b56a3802
JSR
250};
251
4a06bd85 252static struct x86_pmu x86_pmu __read_mostly;
b56a3802 253
cdd6c482 254static DEFINE_PER_CPU(struct cpu_hw_events, cpu_hw_events) = {
b0f3f28e
PZ
255 .enabled = 1,
256};
241771ef 257
07088edb 258static int x86_perf_event_set_period(struct perf_event *event);
b690081d 259
8326f44d 260/*
dfc65094 261 * Generalized hw caching related hw_event table, filled
8326f44d 262 * in on a per model basis. A value of 0 means
dfc65094
IM
263 * 'not supported', -1 means 'hw_event makes no sense on
264 * this CPU', any other value means the raw hw_event
8326f44d
IM
265 * ID.
266 */
267
268#define C(x) PERF_COUNT_HW_CACHE_##x
269
270static u64 __read_mostly hw_cache_event_ids
271 [PERF_COUNT_HW_CACHE_MAX]
272 [PERF_COUNT_HW_CACHE_OP_MAX]
273 [PERF_COUNT_HW_CACHE_RESULT_MAX];
274
ee06094f 275/*
cdd6c482
IM
276 * Propagate event elapsed time into the generic event.
277 * Can only be executed on the CPU where the event is active.
ee06094f
IM
278 * Returns the delta events processed.
279 */
4b7bfd0d 280static u64
cc2ad4ba 281x86_perf_event_update(struct perf_event *event)
ee06094f 282{
cc2ad4ba 283 struct hw_perf_event *hwc = &event->hw;
948b1bb8 284 int shift = 64 - x86_pmu.cntval_bits;
ec3232bd 285 u64 prev_raw_count, new_raw_count;
cc2ad4ba 286 int idx = hwc->idx;
ec3232bd 287 s64 delta;
ee06094f 288
30dd568c
MM
289 if (idx == X86_PMC_IDX_FIXED_BTS)
290 return 0;
291
ee06094f 292 /*
cdd6c482 293 * Careful: an NMI might modify the previous event value.
ee06094f
IM
294 *
295 * Our tactic to handle this is to first atomically read and
296 * exchange a new raw count - then add that new-prev delta
cdd6c482 297 * count to the generic event atomically:
ee06094f
IM
298 */
299again:
e7850595 300 prev_raw_count = local64_read(&hwc->prev_count);
cdd6c482 301 rdmsrl(hwc->event_base + idx, new_raw_count);
ee06094f 302
e7850595 303 if (local64_cmpxchg(&hwc->prev_count, prev_raw_count,
ee06094f
IM
304 new_raw_count) != prev_raw_count)
305 goto again;
306
307 /*
308 * Now we have the new raw value and have updated the prev
309 * timestamp already. We can now calculate the elapsed delta
cdd6c482 310 * (event-)time and add that to the generic event.
ee06094f
IM
311 *
312 * Careful, not all hw sign-extends above the physical width
ec3232bd 313 * of the count.
ee06094f 314 */
ec3232bd
PZ
315 delta = (new_raw_count << shift) - (prev_raw_count << shift);
316 delta >>= shift;
ee06094f 317
e7850595
PZ
318 local64_add(delta, &event->count);
319 local64_sub(delta, &hwc->period_left);
4b7bfd0d
RR
320
321 return new_raw_count;
ee06094f
IM
322}
323
cdd6c482 324static atomic_t active_events;
4e935e47
PZ
325static DEFINE_MUTEX(pmc_reserve_mutex);
326
b27ea29c
RR
327#ifdef CONFIG_X86_LOCAL_APIC
328
4e935e47
PZ
329static bool reserve_pmc_hardware(void)
330{
331 int i;
332
948b1bb8 333 for (i = 0; i < x86_pmu.num_counters; i++) {
4a06bd85 334 if (!reserve_perfctr_nmi(x86_pmu.perfctr + i))
4e935e47
PZ
335 goto perfctr_fail;
336 }
337
948b1bb8 338 for (i = 0; i < x86_pmu.num_counters; i++) {
4a06bd85 339 if (!reserve_evntsel_nmi(x86_pmu.eventsel + i))
4e935e47
PZ
340 goto eventsel_fail;
341 }
342
343 return true;
344
345eventsel_fail:
346 for (i--; i >= 0; i--)
4a06bd85 347 release_evntsel_nmi(x86_pmu.eventsel + i);
4e935e47 348
948b1bb8 349 i = x86_pmu.num_counters;
4e935e47
PZ
350
351perfctr_fail:
352 for (i--; i >= 0; i--)
4a06bd85 353 release_perfctr_nmi(x86_pmu.perfctr + i);
4e935e47 354
4e935e47
PZ
355 return false;
356}
357
358static void release_pmc_hardware(void)
359{
360 int i;
361
948b1bb8 362 for (i = 0; i < x86_pmu.num_counters; i++) {
4a06bd85
RR
363 release_perfctr_nmi(x86_pmu.perfctr + i);
364 release_evntsel_nmi(x86_pmu.eventsel + i);
4e935e47 365 }
4e935e47
PZ
366}
367
b27ea29c
RR
368#else
369
370static bool reserve_pmc_hardware(void) { return true; }
371static void release_pmc_hardware(void) {}
372
373#endif
374
33c6d6a7
DZ
375static bool check_hw_exists(void)
376{
377 u64 val, val_new = 0;
378 int ret = 0;
379
380 val = 0xabcdUL;
381 ret |= checking_wrmsrl(x86_pmu.perfctr, val);
382 ret |= rdmsrl_safe(x86_pmu.perfctr, &val_new);
383 if (ret || val != val_new)
384 return false;
385
386 return true;
387}
388
f80c9e30 389static void reserve_ds_buffers(void);
ca037701 390static void release_ds_buffers(void);
30dd568c 391
cdd6c482 392static void hw_perf_event_destroy(struct perf_event *event)
4e935e47 393{
cdd6c482 394 if (atomic_dec_and_mutex_lock(&active_events, &pmc_reserve_mutex)) {
4e935e47 395 release_pmc_hardware();
ca037701 396 release_ds_buffers();
4e935e47
PZ
397 mutex_unlock(&pmc_reserve_mutex);
398 }
399}
400
85cf9dba
RR
401static inline int x86_pmu_initialized(void)
402{
403 return x86_pmu.handle_irq != NULL;
404}
405
8326f44d 406static inline int
cdd6c482 407set_ext_hw_attr(struct hw_perf_event *hwc, struct perf_event_attr *attr)
8326f44d
IM
408{
409 unsigned int cache_type, cache_op, cache_result;
410 u64 config, val;
411
412 config = attr->config;
413
414 cache_type = (config >> 0) & 0xff;
415 if (cache_type >= PERF_COUNT_HW_CACHE_MAX)
416 return -EINVAL;
417
418 cache_op = (config >> 8) & 0xff;
419 if (cache_op >= PERF_COUNT_HW_CACHE_OP_MAX)
420 return -EINVAL;
421
422 cache_result = (config >> 16) & 0xff;
423 if (cache_result >= PERF_COUNT_HW_CACHE_RESULT_MAX)
424 return -EINVAL;
425
426 val = hw_cache_event_ids[cache_type][cache_op][cache_result];
427
428 if (val == 0)
429 return -ENOENT;
430
431 if (val == -1)
432 return -EINVAL;
433
434 hwc->config |= val;
435
436 return 0;
437}
438
c1726f34
RR
439static int x86_setup_perfctr(struct perf_event *event)
440{
441 struct perf_event_attr *attr = &event->attr;
442 struct hw_perf_event *hwc = &event->hw;
443 u64 config;
444
6c7e550f 445 if (!is_sampling_event(event)) {
c1726f34
RR
446 hwc->sample_period = x86_pmu.max_period;
447 hwc->last_period = hwc->sample_period;
e7850595 448 local64_set(&hwc->period_left, hwc->sample_period);
c1726f34
RR
449 } else {
450 /*
451 * If we have a PMU initialized but no APIC
452 * interrupts, we cannot sample hardware
453 * events (user-space has to fall back and
454 * sample via a hrtimer based software event):
455 */
456 if (!x86_pmu.apic)
457 return -EOPNOTSUPP;
458 }
459
460 if (attr->type == PERF_TYPE_RAW)
461 return 0;
462
463 if (attr->type == PERF_TYPE_HW_CACHE)
464 return set_ext_hw_attr(hwc, attr);
465
466 if (attr->config >= x86_pmu.max_events)
467 return -EINVAL;
468
469 /*
470 * The generic map:
471 */
472 config = x86_pmu.event_map(attr->config);
473
474 if (config == 0)
475 return -ENOENT;
476
477 if (config == -1LL)
478 return -EINVAL;
479
480 /*
481 * Branch tracing:
482 */
483 if ((attr->config == PERF_COUNT_HW_BRANCH_INSTRUCTIONS) &&
484 (hwc->sample_period == 1)) {
485 /* BTS is not supported by this architecture. */
6809b6ea 486 if (!x86_pmu.bts_active)
c1726f34
RR
487 return -EOPNOTSUPP;
488
489 /* BTS is currently only allowed for user-mode. */
490 if (!attr->exclude_kernel)
491 return -EOPNOTSUPP;
492 }
493
494 hwc->config |= config;
495
496 return 0;
497}
4261e0e0 498
b4cdc5c2 499static int x86_pmu_hw_config(struct perf_event *event)
a072738e 500{
ab608344
PZ
501 if (event->attr.precise_ip) {
502 int precise = 0;
503
504 /* Support for constant skid */
6809b6ea 505 if (x86_pmu.pebs_active) {
ab608344
PZ
506 precise++;
507
5553be26
PZ
508 /* Support for IP fixup */
509 if (x86_pmu.lbr_nr)
510 precise++;
511 }
ab608344
PZ
512
513 if (event->attr.precise_ip > precise)
514 return -EOPNOTSUPP;
515 }
516
a072738e
CG
517 /*
518 * Generate PMC IRQs:
519 * (keep 'enabled' bit clear for now)
520 */
b4cdc5c2 521 event->hw.config = ARCH_PERFMON_EVENTSEL_INT;
a072738e
CG
522
523 /*
524 * Count user and OS events unless requested not to
525 */
b4cdc5c2
PZ
526 if (!event->attr.exclude_user)
527 event->hw.config |= ARCH_PERFMON_EVENTSEL_USR;
528 if (!event->attr.exclude_kernel)
529 event->hw.config |= ARCH_PERFMON_EVENTSEL_OS;
a072738e 530
b4cdc5c2
PZ
531 if (event->attr.type == PERF_TYPE_RAW)
532 event->hw.config |= event->attr.config & X86_RAW_EVENT_MASK;
a072738e 533
9d0fcba6 534 return x86_setup_perfctr(event);
a098f448
RR
535}
536
241771ef 537/*
0d48696f 538 * Setup the hardware configuration for a given attr_type
241771ef 539 */
b0a873eb 540static int __x86_pmu_event_init(struct perf_event *event)
241771ef 541{
4e935e47 542 int err;
241771ef 543
85cf9dba
RR
544 if (!x86_pmu_initialized())
545 return -ENODEV;
241771ef 546
4e935e47 547 err = 0;
cdd6c482 548 if (!atomic_inc_not_zero(&active_events)) {
4e935e47 549 mutex_lock(&pmc_reserve_mutex);
cdd6c482 550 if (atomic_read(&active_events) == 0) {
30dd568c
MM
551 if (!reserve_pmc_hardware())
552 err = -EBUSY;
f80c9e30
PZ
553 else
554 reserve_ds_buffers();
30dd568c
MM
555 }
556 if (!err)
cdd6c482 557 atomic_inc(&active_events);
4e935e47
PZ
558 mutex_unlock(&pmc_reserve_mutex);
559 }
560 if (err)
561 return err;
562
cdd6c482 563 event->destroy = hw_perf_event_destroy;
a1792cda 564
4261e0e0
RR
565 event->hw.idx = -1;
566 event->hw.last_cpu = -1;
567 event->hw.last_tag = ~0ULL;
b690081d 568
9d0fcba6 569 return x86_pmu.hw_config(event);
4261e0e0
RR
570}
571
8c48e444 572static void x86_pmu_disable_all(void)
f87ad35d 573{
cdd6c482 574 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
9e35ad38
PZ
575 int idx;
576
948b1bb8 577 for (idx = 0; idx < x86_pmu.num_counters; idx++) {
b0f3f28e
PZ
578 u64 val;
579
43f6201a 580 if (!test_bit(idx, cpuc->active_mask))
4295ee62 581 continue;
8c48e444 582 rdmsrl(x86_pmu.eventsel + idx, val);
bb1165d6 583 if (!(val & ARCH_PERFMON_EVENTSEL_ENABLE))
4295ee62 584 continue;
bb1165d6 585 val &= ~ARCH_PERFMON_EVENTSEL_ENABLE;
8c48e444 586 wrmsrl(x86_pmu.eventsel + idx, val);
f87ad35d 587 }
f87ad35d
JSR
588}
589
a4eaf7f1 590static void x86_pmu_disable(struct pmu *pmu)
b56a3802 591{
1da53e02
SE
592 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
593
85cf9dba 594 if (!x86_pmu_initialized())
9e35ad38 595 return;
1da53e02 596
1a6e21f7
PZ
597 if (!cpuc->enabled)
598 return;
599
600 cpuc->n_added = 0;
601 cpuc->enabled = 0;
602 barrier();
1da53e02
SE
603
604 x86_pmu.disable_all();
b56a3802 605}
241771ef 606
11164cd4 607static void x86_pmu_enable_all(int added)
f87ad35d 608{
cdd6c482 609 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
f87ad35d
JSR
610 int idx;
611
948b1bb8 612 for (idx = 0; idx < x86_pmu.num_counters; idx++) {
cdd6c482 613 struct perf_event *event = cpuc->events[idx];
4295ee62 614 u64 val;
b0f3f28e 615
43f6201a 616 if (!test_bit(idx, cpuc->active_mask))
4295ee62 617 continue;
984b838c 618
cdd6c482 619 val = event->hw.config;
bb1165d6 620 val |= ARCH_PERFMON_EVENTSEL_ENABLE;
8c48e444 621 wrmsrl(x86_pmu.eventsel + idx, val);
f87ad35d
JSR
622 }
623}
624
51b0fe39 625static struct pmu pmu;
1da53e02
SE
626
627static inline int is_x86_event(struct perf_event *event)
628{
629 return event->pmu == &pmu;
630}
631
632static int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign)
633{
63b14649 634 struct event_constraint *c, *constraints[X86_PMC_IDX_MAX];
1da53e02 635 unsigned long used_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)];
c933c1a6 636 int i, j, w, wmax, num = 0;
1da53e02
SE
637 struct hw_perf_event *hwc;
638
639 bitmap_zero(used_mask, X86_PMC_IDX_MAX);
640
641 for (i = 0; i < n; i++) {
b622d644
PZ
642 c = x86_pmu.get_event_constraints(cpuc, cpuc->event_list[i]);
643 constraints[i] = c;
1da53e02
SE
644 }
645
8113070d
SE
646 /*
647 * fastpath, try to reuse previous register
648 */
c933c1a6 649 for (i = 0; i < n; i++) {
8113070d 650 hwc = &cpuc->event_list[i]->hw;
81269a08 651 c = constraints[i];
8113070d
SE
652
653 /* never assigned */
654 if (hwc->idx == -1)
655 break;
656
657 /* constraint still honored */
63b14649 658 if (!test_bit(hwc->idx, c->idxmsk))
8113070d
SE
659 break;
660
661 /* not already used */
662 if (test_bit(hwc->idx, used_mask))
663 break;
664
34538ee7 665 __set_bit(hwc->idx, used_mask);
8113070d
SE
666 if (assign)
667 assign[i] = hwc->idx;
668 }
c933c1a6 669 if (i == n)
8113070d
SE
670 goto done;
671
672 /*
673 * begin slow path
674 */
675
676 bitmap_zero(used_mask, X86_PMC_IDX_MAX);
677
1da53e02
SE
678 /*
679 * weight = number of possible counters
680 *
681 * 1 = most constrained, only works on one counter
682 * wmax = least constrained, works on any counter
683 *
684 * assign events to counters starting with most
685 * constrained events.
686 */
948b1bb8 687 wmax = x86_pmu.num_counters;
1da53e02
SE
688
689 /*
690 * when fixed event counters are present,
691 * wmax is incremented by 1 to account
692 * for one more choice
693 */
948b1bb8 694 if (x86_pmu.num_counters_fixed)
1da53e02
SE
695 wmax++;
696
8113070d 697 for (w = 1, num = n; num && w <= wmax; w++) {
1da53e02 698 /* for each event */
8113070d 699 for (i = 0; num && i < n; i++) {
81269a08 700 c = constraints[i];
1da53e02
SE
701 hwc = &cpuc->event_list[i]->hw;
702
272d30be 703 if (c->weight != w)
1da53e02
SE
704 continue;
705
984b3f57 706 for_each_set_bit(j, c->idxmsk, X86_PMC_IDX_MAX) {
1da53e02
SE
707 if (!test_bit(j, used_mask))
708 break;
709 }
710
711 if (j == X86_PMC_IDX_MAX)
712 break;
1da53e02 713
34538ee7 714 __set_bit(j, used_mask);
8113070d 715
1da53e02
SE
716 if (assign)
717 assign[i] = j;
718 num--;
719 }
720 }
8113070d 721done:
1da53e02
SE
722 /*
723 * scheduling failed or is just a simulation,
724 * free resources if necessary
725 */
726 if (!assign || num) {
727 for (i = 0; i < n; i++) {
728 if (x86_pmu.put_event_constraints)
729 x86_pmu.put_event_constraints(cpuc, cpuc->event_list[i]);
730 }
731 }
732 return num ? -ENOSPC : 0;
733}
734
735/*
736 * dogrp: true if must collect siblings events (group)
737 * returns total number of events and error code
738 */
739static int collect_events(struct cpu_hw_events *cpuc, struct perf_event *leader, bool dogrp)
740{
741 struct perf_event *event;
742 int n, max_count;
743
948b1bb8 744 max_count = x86_pmu.num_counters + x86_pmu.num_counters_fixed;
1da53e02
SE
745
746 /* current number of events already accepted */
747 n = cpuc->n_events;
748
749 if (is_x86_event(leader)) {
750 if (n >= max_count)
751 return -ENOSPC;
752 cpuc->event_list[n] = leader;
753 n++;
754 }
755 if (!dogrp)
756 return n;
757
758 list_for_each_entry(event, &leader->sibling_list, group_entry) {
759 if (!is_x86_event(event) ||
8113070d 760 event->state <= PERF_EVENT_STATE_OFF)
1da53e02
SE
761 continue;
762
763 if (n >= max_count)
764 return -ENOSPC;
765
766 cpuc->event_list[n] = event;
767 n++;
768 }
769 return n;
770}
771
1da53e02 772static inline void x86_assign_hw_event(struct perf_event *event,
447a194b 773 struct cpu_hw_events *cpuc, int i)
1da53e02 774{
447a194b
SE
775 struct hw_perf_event *hwc = &event->hw;
776
777 hwc->idx = cpuc->assign[i];
778 hwc->last_cpu = smp_processor_id();
779 hwc->last_tag = ++cpuc->tags[i];
1da53e02
SE
780
781 if (hwc->idx == X86_PMC_IDX_FIXED_BTS) {
782 hwc->config_base = 0;
783 hwc->event_base = 0;
784 } else if (hwc->idx >= X86_PMC_IDX_FIXED) {
785 hwc->config_base = MSR_ARCH_PERFMON_FIXED_CTR_CTRL;
786 /*
787 * We set it so that event_base + idx in wrmsr/rdmsr maps to
788 * MSR_ARCH_PERFMON_FIXED_CTR0 ... CTR2:
789 */
790 hwc->event_base =
791 MSR_ARCH_PERFMON_FIXED_CTR0 - X86_PMC_IDX_FIXED;
792 } else {
793 hwc->config_base = x86_pmu.eventsel;
794 hwc->event_base = x86_pmu.perfctr;
795 }
796}
797
447a194b
SE
798static inline int match_prev_assignment(struct hw_perf_event *hwc,
799 struct cpu_hw_events *cpuc,
800 int i)
801{
802 return hwc->idx == cpuc->assign[i] &&
803 hwc->last_cpu == smp_processor_id() &&
804 hwc->last_tag == cpuc->tags[i];
805}
806
a4eaf7f1
PZ
807static void x86_pmu_start(struct perf_event *event, int flags);
808static void x86_pmu_stop(struct perf_event *event, int flags);
2e841873 809
a4eaf7f1 810static void x86_pmu_enable(struct pmu *pmu)
ee06094f 811{
1da53e02
SE
812 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
813 struct perf_event *event;
814 struct hw_perf_event *hwc;
11164cd4 815 int i, added = cpuc->n_added;
1da53e02 816
85cf9dba 817 if (!x86_pmu_initialized())
2b9ff0db 818 return;
1a6e21f7
PZ
819
820 if (cpuc->enabled)
821 return;
822
1da53e02 823 if (cpuc->n_added) {
19925ce7 824 int n_running = cpuc->n_events - cpuc->n_added;
1da53e02
SE
825 /*
826 * apply assignment obtained either from
827 * hw_perf_group_sched_in() or x86_pmu_enable()
828 *
829 * step1: save events moving to new counters
830 * step2: reprogram moved events into new counters
831 */
19925ce7 832 for (i = 0; i < n_running; i++) {
1da53e02
SE
833 event = cpuc->event_list[i];
834 hwc = &event->hw;
835
447a194b
SE
836 /*
837 * we can avoid reprogramming counter if:
838 * - assigned same counter as last time
839 * - running on same CPU as last time
840 * - no other event has used the counter since
841 */
842 if (hwc->idx == -1 ||
843 match_prev_assignment(hwc, cpuc, i))
1da53e02
SE
844 continue;
845
a4eaf7f1
PZ
846 /*
847 * Ensure we don't accidentally enable a stopped
848 * counter simply because we rescheduled.
849 */
850 if (hwc->state & PERF_HES_STOPPED)
851 hwc->state |= PERF_HES_ARCH;
852
853 x86_pmu_stop(event, PERF_EF_UPDATE);
1da53e02
SE
854 }
855
856 for (i = 0; i < cpuc->n_events; i++) {
1da53e02
SE
857 event = cpuc->event_list[i];
858 hwc = &event->hw;
859
45e16a68 860 if (!match_prev_assignment(hwc, cpuc, i))
447a194b 861 x86_assign_hw_event(event, cpuc, i);
45e16a68
PZ
862 else if (i < n_running)
863 continue;
1da53e02 864
a4eaf7f1
PZ
865 if (hwc->state & PERF_HES_ARCH)
866 continue;
867
868 x86_pmu_start(event, PERF_EF_RELOAD);
1da53e02
SE
869 }
870 cpuc->n_added = 0;
871 perf_events_lapic_init();
872 }
1a6e21f7
PZ
873
874 cpuc->enabled = 1;
875 barrier();
876
11164cd4 877 x86_pmu.enable_all(added);
ee06094f 878}
ee06094f 879
31fa58af
RR
880static inline void __x86_pmu_enable_event(struct hw_perf_event *hwc,
881 u64 enable_mask)
b0f3f28e 882{
31fa58af 883 wrmsrl(hwc->config_base + hwc->idx, hwc->config | enable_mask);
b0f3f28e
PZ
884}
885
aff3d91a 886static inline void x86_pmu_disable_event(struct perf_event *event)
b0f3f28e 887{
aff3d91a 888 struct hw_perf_event *hwc = &event->hw;
7645a24c
PZ
889
890 wrmsrl(hwc->config_base + hwc->idx, hwc->config);
b0f3f28e
PZ
891}
892
245b2e70 893static DEFINE_PER_CPU(u64 [X86_PMC_IDX_MAX], pmc_prev_left);
241771ef 894
ee06094f
IM
895/*
896 * Set the next IRQ period, based on the hwc->period_left value.
cdd6c482 897 * To be called with the event disabled in hw:
ee06094f 898 */
e4abb5d4 899static int
07088edb 900x86_perf_event_set_period(struct perf_event *event)
241771ef 901{
07088edb 902 struct hw_perf_event *hwc = &event->hw;
e7850595 903 s64 left = local64_read(&hwc->period_left);
e4abb5d4 904 s64 period = hwc->sample_period;
7645a24c 905 int ret = 0, idx = hwc->idx;
ee06094f 906
30dd568c
MM
907 if (idx == X86_PMC_IDX_FIXED_BTS)
908 return 0;
909
ee06094f 910 /*
af901ca1 911 * If we are way outside a reasonable range then just skip forward:
ee06094f
IM
912 */
913 if (unlikely(left <= -period)) {
914 left = period;
e7850595 915 local64_set(&hwc->period_left, left);
9e350de3 916 hwc->last_period = period;
e4abb5d4 917 ret = 1;
ee06094f
IM
918 }
919
920 if (unlikely(left <= 0)) {
921 left += period;
e7850595 922 local64_set(&hwc->period_left, left);
9e350de3 923 hwc->last_period = period;
e4abb5d4 924 ret = 1;
ee06094f 925 }
1c80f4b5 926 /*
dfc65094 927 * Quirk: certain CPUs dont like it if just 1 hw_event is left:
1c80f4b5
IM
928 */
929 if (unlikely(left < 2))
930 left = 2;
241771ef 931
e4abb5d4
PZ
932 if (left > x86_pmu.max_period)
933 left = x86_pmu.max_period;
934
245b2e70 935 per_cpu(pmc_prev_left[idx], smp_processor_id()) = left;
ee06094f
IM
936
937 /*
cdd6c482 938 * The hw event starts counting from this event offset,
ee06094f
IM
939 * mark it to be able to extra future deltas:
940 */
e7850595 941 local64_set(&hwc->prev_count, (u64)-left);
ee06094f 942
68aa00ac
CG
943 wrmsrl(hwc->event_base + idx, (u64)(-left) & x86_pmu.cntval_mask);
944
945 /*
946 * Due to erratum on certan cpu we need
947 * a second write to be sure the register
948 * is updated properly
949 */
950 if (x86_pmu.perfctr_second_write) {
951 wrmsrl(hwc->event_base + idx,
948b1bb8 952 (u64)(-left) & x86_pmu.cntval_mask);
68aa00ac 953 }
e4abb5d4 954
cdd6c482 955 perf_event_update_userpage(event);
194002b2 956
e4abb5d4 957 return ret;
2f18d1e8
IM
958}
959
aff3d91a 960static void x86_pmu_enable_event(struct perf_event *event)
7c90cc45 961{
cdd6c482 962 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
7c90cc45 963 if (cpuc->enabled)
31fa58af
RR
964 __x86_pmu_enable_event(&event->hw,
965 ARCH_PERFMON_EVENTSEL_ENABLE);
241771ef
IM
966}
967
b690081d 968/*
a4eaf7f1 969 * Add a single event to the PMU.
1da53e02
SE
970 *
971 * The event is added to the group of enabled events
972 * but only if it can be scehduled with existing events.
fe9081cc 973 */
a4eaf7f1 974static int x86_pmu_add(struct perf_event *event, int flags)
fe9081cc
PZ
975{
976 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
1da53e02
SE
977 struct hw_perf_event *hwc;
978 int assign[X86_PMC_IDX_MAX];
979 int n, n0, ret;
fe9081cc 980
1da53e02 981 hwc = &event->hw;
fe9081cc 982
33696fc0 983 perf_pmu_disable(event->pmu);
1da53e02 984 n0 = cpuc->n_events;
24cd7f54
PZ
985 ret = n = collect_events(cpuc, event, false);
986 if (ret < 0)
987 goto out;
53b441a5 988
a4eaf7f1
PZ
989 hwc->state = PERF_HES_UPTODATE | PERF_HES_STOPPED;
990 if (!(flags & PERF_EF_START))
991 hwc->state |= PERF_HES_ARCH;
992
4d1c52b0
LM
993 /*
994 * If group events scheduling transaction was started,
995 * skip the schedulability test here, it will be peformed
a4eaf7f1 996 * at commit time (->commit_txn) as a whole
4d1c52b0 997 */
8d2cacbb 998 if (cpuc->group_flag & PERF_EVENT_TXN)
24cd7f54 999 goto done_collect;
4d1c52b0 1000
a072738e 1001 ret = x86_pmu.schedule_events(cpuc, n, assign);
1da53e02 1002 if (ret)
24cd7f54 1003 goto out;
1da53e02
SE
1004 /*
1005 * copy new assignment, now we know it is possible
1006 * will be used by hw_perf_enable()
1007 */
1008 memcpy(cpuc->assign, assign, n*sizeof(int));
7e2ae347 1009
24cd7f54 1010done_collect:
1da53e02 1011 cpuc->n_events = n;
356e1f2e 1012 cpuc->n_added += n - n0;
90151c35 1013 cpuc->n_txn += n - n0;
95cdd2e7 1014
24cd7f54
PZ
1015 ret = 0;
1016out:
33696fc0 1017 perf_pmu_enable(event->pmu);
24cd7f54 1018 return ret;
241771ef
IM
1019}
1020
a4eaf7f1 1021static void x86_pmu_start(struct perf_event *event, int flags)
d76a0812 1022{
c08053e6
PZ
1023 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
1024 int idx = event->hw.idx;
1025
a4eaf7f1
PZ
1026 if (WARN_ON_ONCE(!(event->hw.state & PERF_HES_STOPPED)))
1027 return;
1028
1029 if (WARN_ON_ONCE(idx == -1))
1030 return;
1031
1032 if (flags & PERF_EF_RELOAD) {
1033 WARN_ON_ONCE(!(event->hw.state & PERF_HES_UPTODATE));
1034 x86_perf_event_set_period(event);
1035 }
1036
1037 event->hw.state = 0;
d76a0812 1038
c08053e6
PZ
1039 cpuc->events[idx] = event;
1040 __set_bit(idx, cpuc->active_mask);
63e6be6d 1041 __set_bit(idx, cpuc->running);
aff3d91a 1042 x86_pmu.enable(event);
c08053e6 1043 perf_event_update_userpage(event);
a78ac325
PZ
1044}
1045
cdd6c482 1046void perf_event_print_debug(void)
241771ef 1047{
2f18d1e8 1048 u64 ctrl, status, overflow, pmc_ctrl, pmc_count, prev_left, fixed;
ca037701 1049 u64 pebs;
cdd6c482 1050 struct cpu_hw_events *cpuc;
5bb9efe3 1051 unsigned long flags;
1e125676
IM
1052 int cpu, idx;
1053
948b1bb8 1054 if (!x86_pmu.num_counters)
1e125676 1055 return;
241771ef 1056
5bb9efe3 1057 local_irq_save(flags);
241771ef
IM
1058
1059 cpu = smp_processor_id();
cdd6c482 1060 cpuc = &per_cpu(cpu_hw_events, cpu);
241771ef 1061
faa28ae0 1062 if (x86_pmu.version >= 2) {
a1ef58f4
JSR
1063 rdmsrl(MSR_CORE_PERF_GLOBAL_CTRL, ctrl);
1064 rdmsrl(MSR_CORE_PERF_GLOBAL_STATUS, status);
1065 rdmsrl(MSR_CORE_PERF_GLOBAL_OVF_CTRL, overflow);
1066 rdmsrl(MSR_ARCH_PERFMON_FIXED_CTR_CTRL, fixed);
ca037701 1067 rdmsrl(MSR_IA32_PEBS_ENABLE, pebs);
a1ef58f4
JSR
1068
1069 pr_info("\n");
1070 pr_info("CPU#%d: ctrl: %016llx\n", cpu, ctrl);
1071 pr_info("CPU#%d: status: %016llx\n", cpu, status);
1072 pr_info("CPU#%d: overflow: %016llx\n", cpu, overflow);
1073 pr_info("CPU#%d: fixed: %016llx\n", cpu, fixed);
ca037701 1074 pr_info("CPU#%d: pebs: %016llx\n", cpu, pebs);
f87ad35d 1075 }
7645a24c 1076 pr_info("CPU#%d: active: %016llx\n", cpu, *(u64 *)cpuc->active_mask);
241771ef 1077
948b1bb8 1078 for (idx = 0; idx < x86_pmu.num_counters; idx++) {
4a06bd85
RR
1079 rdmsrl(x86_pmu.eventsel + idx, pmc_ctrl);
1080 rdmsrl(x86_pmu.perfctr + idx, pmc_count);
241771ef 1081
245b2e70 1082 prev_left = per_cpu(pmc_prev_left[idx], cpu);
241771ef 1083
a1ef58f4 1084 pr_info("CPU#%d: gen-PMC%d ctrl: %016llx\n",
241771ef 1085 cpu, idx, pmc_ctrl);
a1ef58f4 1086 pr_info("CPU#%d: gen-PMC%d count: %016llx\n",
241771ef 1087 cpu, idx, pmc_count);
a1ef58f4 1088 pr_info("CPU#%d: gen-PMC%d left: %016llx\n",
ee06094f 1089 cpu, idx, prev_left);
241771ef 1090 }
948b1bb8 1091 for (idx = 0; idx < x86_pmu.num_counters_fixed; idx++) {
2f18d1e8
IM
1092 rdmsrl(MSR_ARCH_PERFMON_FIXED_CTR0 + idx, pmc_count);
1093
a1ef58f4 1094 pr_info("CPU#%d: fixed-PMC%d count: %016llx\n",
2f18d1e8
IM
1095 cpu, idx, pmc_count);
1096 }
5bb9efe3 1097 local_irq_restore(flags);
241771ef
IM
1098}
1099
a4eaf7f1 1100static void x86_pmu_stop(struct perf_event *event, int flags)
241771ef 1101{
d76a0812 1102 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
cdd6c482 1103 struct hw_perf_event *hwc = &event->hw;
241771ef 1104
a4eaf7f1
PZ
1105 if (__test_and_clear_bit(hwc->idx, cpuc->active_mask)) {
1106 x86_pmu.disable(event);
1107 cpuc->events[hwc->idx] = NULL;
1108 WARN_ON_ONCE(hwc->state & PERF_HES_STOPPED);
1109 hwc->state |= PERF_HES_STOPPED;
1110 }
30dd568c 1111
a4eaf7f1
PZ
1112 if ((flags & PERF_EF_UPDATE) && !(hwc->state & PERF_HES_UPTODATE)) {
1113 /*
1114 * Drain the remaining delta count out of a event
1115 * that we are disabling:
1116 */
1117 x86_perf_event_update(event);
1118 hwc->state |= PERF_HES_UPTODATE;
1119 }
2e841873
PZ
1120}
1121
a4eaf7f1 1122static void x86_pmu_del(struct perf_event *event, int flags)
2e841873
PZ
1123{
1124 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
1125 int i;
1126
90151c35
SE
1127 /*
1128 * If we're called during a txn, we don't need to do anything.
1129 * The events never got scheduled and ->cancel_txn will truncate
1130 * the event_list.
1131 */
8d2cacbb 1132 if (cpuc->group_flag & PERF_EVENT_TXN)
90151c35
SE
1133 return;
1134
a4eaf7f1 1135 x86_pmu_stop(event, PERF_EF_UPDATE);
194002b2 1136
1da53e02
SE
1137 for (i = 0; i < cpuc->n_events; i++) {
1138 if (event == cpuc->event_list[i]) {
1139
1140 if (x86_pmu.put_event_constraints)
1141 x86_pmu.put_event_constraints(cpuc, event);
1142
1143 while (++i < cpuc->n_events)
1144 cpuc->event_list[i-1] = cpuc->event_list[i];
1145
1146 --cpuc->n_events;
6c9687ab 1147 break;
1da53e02
SE
1148 }
1149 }
cdd6c482 1150 perf_event_update_userpage(event);
241771ef
IM
1151}
1152
8c48e444 1153static int x86_pmu_handle_irq(struct pt_regs *regs)
a29aa8a7 1154{
df1a132b 1155 struct perf_sample_data data;
cdd6c482
IM
1156 struct cpu_hw_events *cpuc;
1157 struct perf_event *event;
11d1578f 1158 int idx, handled = 0;
9029a5e3
IM
1159 u64 val;
1160
dc1d628a 1161 perf_sample_data_init(&data, 0);
df1a132b 1162
cdd6c482 1163 cpuc = &__get_cpu_var(cpu_hw_events);
962bf7a6 1164
948b1bb8 1165 for (idx = 0; idx < x86_pmu.num_counters; idx++) {
63e6be6d
RR
1166 if (!test_bit(idx, cpuc->active_mask)) {
1167 /*
1168 * Though we deactivated the counter some cpus
1169 * might still deliver spurious interrupts still
1170 * in flight. Catch them:
1171 */
1172 if (__test_and_clear_bit(idx, cpuc->running))
1173 handled++;
a29aa8a7 1174 continue;
63e6be6d 1175 }
962bf7a6 1176
cdd6c482 1177 event = cpuc->events[idx];
a4016a79 1178
cc2ad4ba 1179 val = x86_perf_event_update(event);
948b1bb8 1180 if (val & (1ULL << (x86_pmu.cntval_bits - 1)))
48e22d56 1181 continue;
962bf7a6 1182
9e350de3 1183 /*
cdd6c482 1184 * event overflow
9e350de3 1185 */
4177c42a 1186 handled++;
cdd6c482 1187 data.period = event->hw.last_period;
9e350de3 1188
07088edb 1189 if (!x86_perf_event_set_period(event))
e4abb5d4
PZ
1190 continue;
1191
cdd6c482 1192 if (perf_event_overflow(event, 1, &data, regs))
a4eaf7f1 1193 x86_pmu_stop(event, 0);
a29aa8a7 1194 }
962bf7a6 1195
9e350de3
PZ
1196 if (handled)
1197 inc_irq_stat(apic_perf_irqs);
1198
a29aa8a7
RR
1199 return handled;
1200}
39d81eab 1201
cdd6c482 1202void perf_events_lapic_init(void)
241771ef 1203{
04da8a43 1204 if (!x86_pmu.apic || !x86_pmu_initialized())
241771ef 1205 return;
85cf9dba 1206
241771ef 1207 /*
c323d95f 1208 * Always use NMI for PMU
241771ef 1209 */
c323d95f 1210 apic_write(APIC_LVTPC, APIC_DM_NMI);
241771ef
IM
1211}
1212
4177c42a
RR
1213struct pmu_nmi_state {
1214 unsigned int marked;
1215 int handled;
1216};
1217
1218static DEFINE_PER_CPU(struct pmu_nmi_state, pmu_nmi);
1219
241771ef 1220static int __kprobes
cdd6c482 1221perf_event_nmi_handler(struct notifier_block *self,
241771ef
IM
1222 unsigned long cmd, void *__args)
1223{
1224 struct die_args *args = __args;
4177c42a
RR
1225 unsigned int this_nmi;
1226 int handled;
b0f3f28e 1227
cdd6c482 1228 if (!atomic_read(&active_events))
63a809a2
PZ
1229 return NOTIFY_DONE;
1230
b0f3f28e
PZ
1231 switch (cmd) {
1232 case DIE_NMI:
1233 case DIE_NMI_IPI:
1234 break;
4177c42a
RR
1235 case DIE_NMIUNKNOWN:
1236 this_nmi = percpu_read(irq_stat.__nmi_count);
1237 if (this_nmi != __get_cpu_var(pmu_nmi).marked)
1238 /* let the kernel handle the unknown nmi */
1239 return NOTIFY_DONE;
1240 /*
1241 * This one is a PMU back-to-back nmi. Two events
1242 * trigger 'simultaneously' raising two back-to-back
1243 * NMIs. If the first NMI handles both, the latter
1244 * will be empty and daze the CPU. So, we drop it to
1245 * avoid false-positive 'unknown nmi' messages.
1246 */
1247 return NOTIFY_STOP;
b0f3f28e 1248 default:
241771ef 1249 return NOTIFY_DONE;
b0f3f28e 1250 }
241771ef 1251
241771ef 1252 apic_write(APIC_LVTPC, APIC_DM_NMI);
4177c42a
RR
1253
1254 handled = x86_pmu.handle_irq(args->regs);
1255 if (!handled)
1256 return NOTIFY_DONE;
1257
1258 this_nmi = percpu_read(irq_stat.__nmi_count);
1259 if ((handled > 1) ||
1260 /* the next nmi could be a back-to-back nmi */
1261 ((__get_cpu_var(pmu_nmi).marked == this_nmi) &&
1262 (__get_cpu_var(pmu_nmi).handled > 1))) {
1263 /*
1264 * We could have two subsequent back-to-back nmis: The
1265 * first handles more than one counter, the 2nd
1266 * handles only one counter and the 3rd handles no
1267 * counter.
1268 *
1269 * This is the 2nd nmi because the previous was
1270 * handling more than one counter. We will mark the
1271 * next (3rd) and then drop it if unhandled.
1272 */
1273 __get_cpu_var(pmu_nmi).marked = this_nmi + 1;
1274 __get_cpu_var(pmu_nmi).handled = handled;
1275 }
241771ef 1276
a4016a79 1277 return NOTIFY_STOP;
241771ef
IM
1278}
1279
f22f54f4
PZ
1280static __read_mostly struct notifier_block perf_event_nmi_notifier = {
1281 .notifier_call = perf_event_nmi_handler,
1282 .next = NULL,
1283 .priority = 1
1284};
1285
63b14649 1286static struct event_constraint unconstrained;
38331f62 1287static struct event_constraint emptyconstraint;
63b14649 1288
63b14649 1289static struct event_constraint *
f22f54f4 1290x86_get_event_constraints(struct cpu_hw_events *cpuc, struct perf_event *event)
1da53e02 1291{
63b14649 1292 struct event_constraint *c;
1da53e02 1293
1da53e02
SE
1294 if (x86_pmu.event_constraints) {
1295 for_each_event_constraint(c, x86_pmu.event_constraints) {
63b14649
PZ
1296 if ((event->hw.config & c->cmask) == c->code)
1297 return c;
1da53e02
SE
1298 }
1299 }
63b14649
PZ
1300
1301 return &unconstrained;
1da53e02
SE
1302}
1303
f22f54f4
PZ
1304#include "perf_event_amd.c"
1305#include "perf_event_p6.c"
a072738e 1306#include "perf_event_p4.c"
caff2bef 1307#include "perf_event_intel_lbr.c"
ca037701 1308#include "perf_event_intel_ds.c"
f22f54f4 1309#include "perf_event_intel.c"
f87ad35d 1310
3f6da390
PZ
1311static int __cpuinit
1312x86_pmu_notifier(struct notifier_block *self, unsigned long action, void *hcpu)
1313{
1314 unsigned int cpu = (long)hcpu;
b38b24ea 1315 int ret = NOTIFY_OK;
3f6da390
PZ
1316
1317 switch (action & ~CPU_TASKS_FROZEN) {
1318 case CPU_UP_PREPARE:
1319 if (x86_pmu.cpu_prepare)
b38b24ea 1320 ret = x86_pmu.cpu_prepare(cpu);
3f6da390
PZ
1321 break;
1322
1323 case CPU_STARTING:
1324 if (x86_pmu.cpu_starting)
1325 x86_pmu.cpu_starting(cpu);
1326 break;
1327
1328 case CPU_DYING:
1329 if (x86_pmu.cpu_dying)
1330 x86_pmu.cpu_dying(cpu);
1331 break;
1332
b38b24ea 1333 case CPU_UP_CANCELED:
3f6da390
PZ
1334 case CPU_DEAD:
1335 if (x86_pmu.cpu_dead)
1336 x86_pmu.cpu_dead(cpu);
1337 break;
1338
1339 default:
1340 break;
1341 }
1342
b38b24ea 1343 return ret;
3f6da390
PZ
1344}
1345
12558038
CG
1346static void __init pmu_check_apic(void)
1347{
1348 if (cpu_has_apic)
1349 return;
1350
1351 x86_pmu.apic = 0;
1352 pr_info("no APIC, boot with the \"lapic\" boot parameter to force-enable it.\n");
1353 pr_info("no hardware sampling interrupt available.\n");
1354}
1355
cdd6c482 1356void __init init_hw_perf_events(void)
b56a3802 1357{
b622d644 1358 struct event_constraint *c;
72eae04d
RR
1359 int err;
1360
cdd6c482 1361 pr_info("Performance Events: ");
1123e3ad 1362
b56a3802
JSR
1363 switch (boot_cpu_data.x86_vendor) {
1364 case X86_VENDOR_INTEL:
72eae04d 1365 err = intel_pmu_init();
b56a3802 1366 break;
f87ad35d 1367 case X86_VENDOR_AMD:
72eae04d 1368 err = amd_pmu_init();
f87ad35d 1369 break;
4138960a
RR
1370 default:
1371 return;
b56a3802 1372 }
1123e3ad 1373 if (err != 0) {
cdd6c482 1374 pr_cont("no PMU driver, software events only.\n");
b56a3802 1375 return;
1123e3ad 1376 }
b56a3802 1377
12558038
CG
1378 pmu_check_apic();
1379
33c6d6a7
DZ
1380 /* sanity check that the hardware exists or is emulated */
1381 if (!check_hw_exists()) {
1382 pr_cont("Broken PMU hardware detected, software events only.\n");
1383 return;
1384 }
1385
1123e3ad 1386 pr_cont("%s PMU driver.\n", x86_pmu.name);
faa28ae0 1387
3c44780b
PZ
1388 if (x86_pmu.quirks)
1389 x86_pmu.quirks();
1390
948b1bb8 1391 if (x86_pmu.num_counters > X86_PMC_MAX_GENERIC) {
cdd6c482 1392 WARN(1, KERN_ERR "hw perf events %d > max(%d), clipping!",
948b1bb8
RR
1393 x86_pmu.num_counters, X86_PMC_MAX_GENERIC);
1394 x86_pmu.num_counters = X86_PMC_MAX_GENERIC;
241771ef 1395 }
948b1bb8 1396 x86_pmu.intel_ctrl = (1 << x86_pmu.num_counters) - 1;
241771ef 1397
948b1bb8 1398 if (x86_pmu.num_counters_fixed > X86_PMC_MAX_FIXED) {
cdd6c482 1399 WARN(1, KERN_ERR "hw perf events fixed %d > max(%d), clipping!",
948b1bb8
RR
1400 x86_pmu.num_counters_fixed, X86_PMC_MAX_FIXED);
1401 x86_pmu.num_counters_fixed = X86_PMC_MAX_FIXED;
703e937c 1402 }
862a1a5f 1403
d6dc0b4e 1404 x86_pmu.intel_ctrl |=
948b1bb8 1405 ((1LL << x86_pmu.num_counters_fixed)-1) << X86_PMC_IDX_FIXED;
241771ef 1406
cdd6c482
IM
1407 perf_events_lapic_init();
1408 register_die_notifier(&perf_event_nmi_notifier);
1123e3ad 1409
63b14649 1410 unconstrained = (struct event_constraint)
948b1bb8
RR
1411 __EVENT_CONSTRAINT(0, (1ULL << x86_pmu.num_counters) - 1,
1412 0, x86_pmu.num_counters);
63b14649 1413
b622d644
PZ
1414 if (x86_pmu.event_constraints) {
1415 for_each_event_constraint(c, x86_pmu.event_constraints) {
a098f448 1416 if (c->cmask != X86_RAW_EVENT_MASK)
b622d644
PZ
1417 continue;
1418
948b1bb8
RR
1419 c->idxmsk64 |= (1ULL << x86_pmu.num_counters) - 1;
1420 c->weight += x86_pmu.num_counters;
b622d644
PZ
1421 }
1422 }
1423
57c0c15b 1424 pr_info("... version: %d\n", x86_pmu.version);
948b1bb8
RR
1425 pr_info("... bit width: %d\n", x86_pmu.cntval_bits);
1426 pr_info("... generic registers: %d\n", x86_pmu.num_counters);
1427 pr_info("... value mask: %016Lx\n", x86_pmu.cntval_mask);
57c0c15b 1428 pr_info("... max period: %016Lx\n", x86_pmu.max_period);
948b1bb8 1429 pr_info("... fixed-purpose events: %d\n", x86_pmu.num_counters_fixed);
d6dc0b4e 1430 pr_info("... event mask: %016Lx\n", x86_pmu.intel_ctrl);
3f6da390 1431
b0a873eb 1432 perf_pmu_register(&pmu);
3f6da390 1433 perf_cpu_notifier(x86_pmu_notifier);
241771ef 1434}
621a01ea 1435
cdd6c482 1436static inline void x86_pmu_read(struct perf_event *event)
ee06094f 1437{
cc2ad4ba 1438 x86_perf_event_update(event);
ee06094f
IM
1439}
1440
4d1c52b0
LM
1441/*
1442 * Start group events scheduling transaction
1443 * Set the flag to make pmu::enable() not perform the
1444 * schedulability test, it will be performed at commit time
1445 */
51b0fe39 1446static void x86_pmu_start_txn(struct pmu *pmu)
4d1c52b0
LM
1447{
1448 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
1449
33696fc0 1450 perf_pmu_disable(pmu);
8d2cacbb 1451 cpuc->group_flag |= PERF_EVENT_TXN;
90151c35 1452 cpuc->n_txn = 0;
4d1c52b0
LM
1453}
1454
1455/*
1456 * Stop group events scheduling transaction
1457 * Clear the flag and pmu::enable() will perform the
1458 * schedulability test.
1459 */
51b0fe39 1460static void x86_pmu_cancel_txn(struct pmu *pmu)
4d1c52b0
LM
1461{
1462 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
1463
8d2cacbb 1464 cpuc->group_flag &= ~PERF_EVENT_TXN;
90151c35
SE
1465 /*
1466 * Truncate the collected events.
1467 */
1468 cpuc->n_added -= cpuc->n_txn;
1469 cpuc->n_events -= cpuc->n_txn;
33696fc0 1470 perf_pmu_enable(pmu);
4d1c52b0
LM
1471}
1472
1473/*
1474 * Commit group events scheduling transaction
1475 * Perform the group schedulability test as a whole
1476 * Return 0 if success
1477 */
51b0fe39 1478static int x86_pmu_commit_txn(struct pmu *pmu)
4d1c52b0
LM
1479{
1480 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
1481 int assign[X86_PMC_IDX_MAX];
1482 int n, ret;
1483
1484 n = cpuc->n_events;
1485
1486 if (!x86_pmu_initialized())
1487 return -EAGAIN;
1488
1489 ret = x86_pmu.schedule_events(cpuc, n, assign);
1490 if (ret)
1491 return ret;
1492
1493 /*
1494 * copy new assignment, now we know it is possible
1495 * will be used by hw_perf_enable()
1496 */
1497 memcpy(cpuc->assign, assign, n*sizeof(int));
1498
8d2cacbb 1499 cpuc->group_flag &= ~PERF_EVENT_TXN;
33696fc0 1500 perf_pmu_enable(pmu);
4d1c52b0
LM
1501 return 0;
1502}
1503
ca037701
PZ
1504/*
1505 * validate that we can schedule this event
1506 */
1507static int validate_event(struct perf_event *event)
1508{
1509 struct cpu_hw_events *fake_cpuc;
1510 struct event_constraint *c;
1511 int ret = 0;
1512
1513 fake_cpuc = kmalloc(sizeof(*fake_cpuc), GFP_KERNEL | __GFP_ZERO);
1514 if (!fake_cpuc)
1515 return -ENOMEM;
1516
1517 c = x86_pmu.get_event_constraints(fake_cpuc, event);
1518
1519 if (!c || !c->weight)
1520 ret = -ENOSPC;
1521
1522 if (x86_pmu.put_event_constraints)
1523 x86_pmu.put_event_constraints(fake_cpuc, event);
1524
1525 kfree(fake_cpuc);
1526
1527 return ret;
1528}
1529
1da53e02
SE
1530/*
1531 * validate a single event group
1532 *
1533 * validation include:
184f412c
IM
1534 * - check events are compatible which each other
1535 * - events do not compete for the same counter
1536 * - number of events <= number of counters
1da53e02
SE
1537 *
1538 * validation ensures the group can be loaded onto the
1539 * PMU if it was the only group available.
1540 */
fe9081cc
PZ
1541static int validate_group(struct perf_event *event)
1542{
1da53e02 1543 struct perf_event *leader = event->group_leader;
502568d5
PZ
1544 struct cpu_hw_events *fake_cpuc;
1545 int ret, n;
fe9081cc 1546
502568d5
PZ
1547 ret = -ENOMEM;
1548 fake_cpuc = kmalloc(sizeof(*fake_cpuc), GFP_KERNEL | __GFP_ZERO);
1549 if (!fake_cpuc)
1550 goto out;
fe9081cc 1551
1da53e02
SE
1552 /*
1553 * the event is not yet connected with its
1554 * siblings therefore we must first collect
1555 * existing siblings, then add the new event
1556 * before we can simulate the scheduling
1557 */
502568d5
PZ
1558 ret = -ENOSPC;
1559 n = collect_events(fake_cpuc, leader, true);
1da53e02 1560 if (n < 0)
502568d5 1561 goto out_free;
fe9081cc 1562
502568d5
PZ
1563 fake_cpuc->n_events = n;
1564 n = collect_events(fake_cpuc, event, false);
1da53e02 1565 if (n < 0)
502568d5 1566 goto out_free;
fe9081cc 1567
502568d5 1568 fake_cpuc->n_events = n;
1da53e02 1569
a072738e 1570 ret = x86_pmu.schedule_events(fake_cpuc, n, NULL);
502568d5
PZ
1571
1572out_free:
1573 kfree(fake_cpuc);
1574out:
1575 return ret;
fe9081cc
PZ
1576}
1577
b0a873eb 1578int x86_pmu_event_init(struct perf_event *event)
621a01ea 1579{
51b0fe39 1580 struct pmu *tmp;
621a01ea
IM
1581 int err;
1582
b0a873eb
PZ
1583 switch (event->attr.type) {
1584 case PERF_TYPE_RAW:
1585 case PERF_TYPE_HARDWARE:
1586 case PERF_TYPE_HW_CACHE:
1587 break;
1588
1589 default:
1590 return -ENOENT;
1591 }
1592
1593 err = __x86_pmu_event_init(event);
fe9081cc 1594 if (!err) {
8113070d
SE
1595 /*
1596 * we temporarily connect event to its pmu
1597 * such that validate_group() can classify
1598 * it as an x86 event using is_x86_event()
1599 */
1600 tmp = event->pmu;
1601 event->pmu = &pmu;
1602
fe9081cc
PZ
1603 if (event->group_leader != event)
1604 err = validate_group(event);
ca037701
PZ
1605 else
1606 err = validate_event(event);
8113070d
SE
1607
1608 event->pmu = tmp;
fe9081cc 1609 }
a1792cda 1610 if (err) {
cdd6c482
IM
1611 if (event->destroy)
1612 event->destroy(event);
a1792cda 1613 }
621a01ea 1614
b0a873eb 1615 return err;
621a01ea 1616}
d7d59fb3 1617
b0a873eb 1618static struct pmu pmu = {
a4eaf7f1
PZ
1619 .pmu_enable = x86_pmu_enable,
1620 .pmu_disable = x86_pmu_disable,
1621
b0a873eb 1622 .event_init = x86_pmu_event_init,
a4eaf7f1
PZ
1623
1624 .add = x86_pmu_add,
1625 .del = x86_pmu_del,
b0a873eb
PZ
1626 .start = x86_pmu_start,
1627 .stop = x86_pmu_stop,
1628 .read = x86_pmu_read,
a4eaf7f1 1629
b0a873eb
PZ
1630 .start_txn = x86_pmu_start_txn,
1631 .cancel_txn = x86_pmu_cancel_txn,
1632 .commit_txn = x86_pmu_commit_txn,
1633};
1634
d7d59fb3
PZ
1635/*
1636 * callchain support
1637 */
1638
d7d59fb3
PZ
1639static void
1640backtrace_warning_symbol(void *data, char *msg, unsigned long symbol)
1641{
1642 /* Ignore warnings */
1643}
1644
1645static void backtrace_warning(void *data, char *msg)
1646{
1647 /* Ignore warnings */
1648}
1649
1650static int backtrace_stack(void *data, char *name)
1651{
038e836e 1652 return 0;
d7d59fb3
PZ
1653}
1654
1655static void backtrace_address(void *data, unsigned long addr, int reliable)
1656{
1657 struct perf_callchain_entry *entry = data;
1658
70791ce9 1659 perf_callchain_store(entry, addr);
d7d59fb3
PZ
1660}
1661
1662static const struct stacktrace_ops backtrace_ops = {
1663 .warning = backtrace_warning,
1664 .warning_symbol = backtrace_warning_symbol,
1665 .stack = backtrace_stack,
1666 .address = backtrace_address,
06d65bda 1667 .walk_stack = print_context_stack_bp,
d7d59fb3
PZ
1668};
1669
56962b44
FW
1670void
1671perf_callchain_kernel(struct perf_callchain_entry *entry, struct pt_regs *regs)
d7d59fb3 1672{
927c7a9e
FW
1673 if (perf_guest_cbs && perf_guest_cbs->is_in_guest()) {
1674 /* TODO: We don't support guest os callchain now */
ed805261 1675 return;
927c7a9e
FW
1676 }
1677
70791ce9 1678 perf_callchain_store(entry, regs->ip);
d7d59fb3 1679
9c0729dc 1680 dump_trace(NULL, regs, NULL, &backtrace_ops, entry);
d7d59fb3
PZ
1681}
1682
257ef9d2
TE
1683#ifdef CONFIG_COMPAT
1684static inline int
1685perf_callchain_user32(struct pt_regs *regs, struct perf_callchain_entry *entry)
74193ef0 1686{
257ef9d2
TE
1687 /* 32-bit process in 64-bit kernel. */
1688 struct stack_frame_ia32 frame;
1689 const void __user *fp;
74193ef0 1690
257ef9d2
TE
1691 if (!test_thread_flag(TIF_IA32))
1692 return 0;
1693
1694 fp = compat_ptr(regs->bp);
1695 while (entry->nr < PERF_MAX_STACK_DEPTH) {
1696 unsigned long bytes;
1697 frame.next_frame = 0;
1698 frame.return_address = 0;
1699
1700 bytes = copy_from_user_nmi(&frame, fp, sizeof(frame));
1701 if (bytes != sizeof(frame))
1702 break;
74193ef0 1703
257ef9d2
TE
1704 if (fp < compat_ptr(regs->sp))
1705 break;
74193ef0 1706
70791ce9 1707 perf_callchain_store(entry, frame.return_address);
257ef9d2
TE
1708 fp = compat_ptr(frame.next_frame);
1709 }
1710 return 1;
d7d59fb3 1711}
257ef9d2
TE
1712#else
1713static inline int
1714perf_callchain_user32(struct pt_regs *regs, struct perf_callchain_entry *entry)
1715{
1716 return 0;
1717}
1718#endif
d7d59fb3 1719
56962b44
FW
1720void
1721perf_callchain_user(struct perf_callchain_entry *entry, struct pt_regs *regs)
d7d59fb3
PZ
1722{
1723 struct stack_frame frame;
1724 const void __user *fp;
1725
927c7a9e
FW
1726 if (perf_guest_cbs && perf_guest_cbs->is_in_guest()) {
1727 /* TODO: We don't support guest os callchain now */
ed805261 1728 return;
927c7a9e 1729 }
5a6cec3a 1730
74193ef0 1731 fp = (void __user *)regs->bp;
d7d59fb3 1732
70791ce9 1733 perf_callchain_store(entry, regs->ip);
d7d59fb3 1734
257ef9d2
TE
1735 if (perf_callchain_user32(regs, entry))
1736 return;
1737
f9188e02 1738 while (entry->nr < PERF_MAX_STACK_DEPTH) {
257ef9d2 1739 unsigned long bytes;
038e836e 1740 frame.next_frame = NULL;
d7d59fb3
PZ
1741 frame.return_address = 0;
1742
257ef9d2
TE
1743 bytes = copy_from_user_nmi(&frame, fp, sizeof(frame));
1744 if (bytes != sizeof(frame))
d7d59fb3
PZ
1745 break;
1746
5a6cec3a 1747 if ((unsigned long)fp < regs->sp)
d7d59fb3
PZ
1748 break;
1749
70791ce9 1750 perf_callchain_store(entry, frame.return_address);
038e836e 1751 fp = frame.next_frame;
d7d59fb3
PZ
1752 }
1753}
1754
39447b38
ZY
1755unsigned long perf_instruction_pointer(struct pt_regs *regs)
1756{
1757 unsigned long ip;
dcf46b94 1758
39447b38
ZY
1759 if (perf_guest_cbs && perf_guest_cbs->is_in_guest())
1760 ip = perf_guest_cbs->get_guest_ip();
1761 else
1762 ip = instruction_pointer(regs);
dcf46b94 1763
39447b38
ZY
1764 return ip;
1765}
1766
1767unsigned long perf_misc_flags(struct pt_regs *regs)
1768{
1769 int misc = 0;
dcf46b94 1770
39447b38 1771 if (perf_guest_cbs && perf_guest_cbs->is_in_guest()) {
dcf46b94
ZY
1772 if (perf_guest_cbs->is_user_mode())
1773 misc |= PERF_RECORD_MISC_GUEST_USER;
1774 else
1775 misc |= PERF_RECORD_MISC_GUEST_KERNEL;
1776 } else {
1777 if (user_mode(regs))
1778 misc |= PERF_RECORD_MISC_USER;
1779 else
1780 misc |= PERF_RECORD_MISC_KERNEL;
1781 }
1782
39447b38 1783 if (regs->flags & PERF_EFLAGS_EXACT)
ab608344 1784 misc |= PERF_RECORD_MISC_EXACT_IP;
39447b38
ZY
1785
1786 return misc;
1787}