]> git.proxmox.com Git - mirror_ubuntu-artful-kernel.git/blame - arch/x86/kernel/cpu/perf_event.c
perf TUI: Add a "Zoom into COMM(PID) thread" and reverse operations
[mirror_ubuntu-artful-kernel.git] / arch / x86 / kernel / cpu / perf_event.c
CommitLineData
241771ef 1/*
cdd6c482 2 * Performance events x86 architecture code
241771ef 3 *
98144511
IM
4 * Copyright (C) 2008 Thomas Gleixner <tglx@linutronix.de>
5 * Copyright (C) 2008-2009 Red Hat, Inc., Ingo Molnar
6 * Copyright (C) 2009 Jaswinder Singh Rajput
7 * Copyright (C) 2009 Advanced Micro Devices, Inc., Robert Richter
8 * Copyright (C) 2008-2009 Red Hat, Inc., Peter Zijlstra <pzijlstr@redhat.com>
30dd568c 9 * Copyright (C) 2009 Intel Corporation, <markus.t.metzger@intel.com>
1da53e02 10 * Copyright (C) 2009 Google, Inc., Stephane Eranian
241771ef
IM
11 *
12 * For licencing details see kernel-base/COPYING
13 */
14
cdd6c482 15#include <linux/perf_event.h>
241771ef
IM
16#include <linux/capability.h>
17#include <linux/notifier.h>
18#include <linux/hardirq.h>
19#include <linux/kprobes.h>
4ac13294 20#include <linux/module.h>
241771ef
IM
21#include <linux/kdebug.h>
22#include <linux/sched.h>
d7d59fb3 23#include <linux/uaccess.h>
74193ef0 24#include <linux/highmem.h>
30dd568c 25#include <linux/cpu.h>
272d30be 26#include <linux/bitops.h>
241771ef 27
241771ef 28#include <asm/apic.h>
d7d59fb3 29#include <asm/stacktrace.h>
4e935e47 30#include <asm/nmi.h>
257ef9d2 31#include <asm/compat.h>
241771ef 32
7645a24c
PZ
33#if 0
34#undef wrmsrl
35#define wrmsrl(msr, val) \
36do { \
37 trace_printk("wrmsrl(%lx, %lx)\n", (unsigned long)(msr),\
38 (unsigned long)(val)); \
39 native_write_msr((msr), (u32)((u64)(val)), \
40 (u32)((u64)(val) >> 32)); \
41} while (0)
42#endif
43
ef21f683
PZ
44/*
45 * best effort, GUP based copy_from_user() that assumes IRQ or NMI context
46 */
47static unsigned long
48copy_from_user_nmi(void *to, const void __user *from, unsigned long n)
49{
50 unsigned long offset, addr = (unsigned long)from;
51 int type = in_nmi() ? KM_NMI : KM_IRQ0;
52 unsigned long size, len = 0;
53 struct page *page;
54 void *map;
55 int ret;
56
57 do {
58 ret = __get_user_pages_fast(addr, 1, 0, &page);
59 if (!ret)
60 break;
61
62 offset = addr & (PAGE_SIZE - 1);
63 size = min(PAGE_SIZE - offset, n - len);
64
65 map = kmap_atomic(page, type);
66 memcpy(to, map+offset, size);
67 kunmap_atomic(map, type);
68 put_page(page);
69
70 len += size;
71 to += size;
72 addr += size;
73
74 } while (len < n);
75
76 return len;
77}
78
1da53e02 79struct event_constraint {
c91e0f5d
PZ
80 union {
81 unsigned long idxmsk[BITS_TO_LONGS(X86_PMC_IDX_MAX)];
b622d644 82 u64 idxmsk64;
c91e0f5d 83 };
b622d644
PZ
84 u64 code;
85 u64 cmask;
272d30be 86 int weight;
1da53e02
SE
87};
88
38331f62
SE
89struct amd_nb {
90 int nb_id; /* NorthBridge id */
91 int refcnt; /* reference count */
92 struct perf_event *owners[X86_PMC_IDX_MAX];
93 struct event_constraint event_constraints[X86_PMC_IDX_MAX];
94};
95
caff2bef
PZ
96#define MAX_LBR_ENTRIES 16
97
cdd6c482 98struct cpu_hw_events {
ca037701
PZ
99 /*
100 * Generic x86 PMC bits
101 */
1da53e02 102 struct perf_event *events[X86_PMC_IDX_MAX]; /* in counter order */
43f6201a 103 unsigned long active_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)];
b0f3f28e 104 int enabled;
241771ef 105
1da53e02
SE
106 int n_events;
107 int n_added;
108 int assign[X86_PMC_IDX_MAX]; /* event to counter assignment */
447a194b 109 u64 tags[X86_PMC_IDX_MAX];
1da53e02 110 struct perf_event *event_list[X86_PMC_IDX_MAX]; /* in enabled order */
ca037701
PZ
111
112 /*
113 * Intel DebugStore bits
114 */
115 struct debug_store *ds;
116 u64 pebs_enabled;
117
caff2bef
PZ
118 /*
119 * Intel LBR bits
120 */
121 int lbr_users;
122 void *lbr_context;
123 struct perf_branch_stack lbr_stack;
124 struct perf_branch_entry lbr_entries[MAX_LBR_ENTRIES];
125
ca037701
PZ
126 /*
127 * AMD specific bits
128 */
38331f62 129 struct amd_nb *amd_nb;
b690081d
SE
130};
131
fce877e3 132#define __EVENT_CONSTRAINT(c, n, m, w) {\
b622d644 133 { .idxmsk64 = (n) }, \
c91e0f5d
PZ
134 .code = (c), \
135 .cmask = (m), \
fce877e3 136 .weight = (w), \
c91e0f5d 137}
b690081d 138
fce877e3
PZ
139#define EVENT_CONSTRAINT(c, n, m) \
140 __EVENT_CONSTRAINT(c, n, m, HWEIGHT(n))
141
ca037701
PZ
142/*
143 * Constraint on the Event code.
144 */
ed8777fc 145#define INTEL_EVENT_CONSTRAINT(c, n) \
a098f448 146 EVENT_CONSTRAINT(c, n, ARCH_PERFMON_EVENTSEL_EVENT)
8433be11 147
ca037701
PZ
148/*
149 * Constraint on the Event code + UMask + fixed-mask
a098f448
RR
150 *
151 * filter mask to validate fixed counter events.
152 * the following filters disqualify for fixed counters:
153 * - inv
154 * - edge
155 * - cnt-mask
156 * The other filters are supported by fixed counters.
157 * The any-thread option is supported starting with v3.
ca037701 158 */
ed8777fc 159#define FIXED_EVENT_CONSTRAINT(c, n) \
a098f448 160 EVENT_CONSTRAINT(c, (1ULL << (32+n)), X86_RAW_EVENT_MASK)
8433be11 161
ca037701
PZ
162/*
163 * Constraint on the Event code + UMask
164 */
165#define PEBS_EVENT_CONSTRAINT(c, n) \
166 EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK)
167
ed8777fc
PZ
168#define EVENT_CONSTRAINT_END \
169 EVENT_CONSTRAINT(0, 0, 0)
170
171#define for_each_event_constraint(e, c) \
172 for ((e) = (c); (e)->cmask; (e)++)
b690081d 173
8db909a7
PZ
174union perf_capabilities {
175 struct {
176 u64 lbr_format : 6;
177 u64 pebs_trap : 1;
178 u64 pebs_arch_reg : 1;
179 u64 pebs_format : 4;
180 u64 smm_freeze : 1;
181 };
182 u64 capabilities;
183};
184
241771ef 185/*
5f4ec28f 186 * struct x86_pmu - generic x86 pmu
241771ef 187 */
5f4ec28f 188struct x86_pmu {
ca037701
PZ
189 /*
190 * Generic x86 PMC bits
191 */
faa28ae0
RR
192 const char *name;
193 int version;
a3288106 194 int (*handle_irq)(struct pt_regs *);
9e35ad38 195 void (*disable_all)(void);
11164cd4 196 void (*enable_all)(int added);
aff3d91a
PZ
197 void (*enable)(struct perf_event *);
198 void (*disable)(struct perf_event *);
b4cdc5c2 199 int (*hw_config)(struct perf_event *event);
a072738e 200 int (*schedule_events)(struct cpu_hw_events *cpuc, int n, int *assign);
169e41eb
JSR
201 unsigned eventsel;
202 unsigned perfctr;
b0f3f28e 203 u64 (*event_map)(int);
169e41eb 204 int max_events;
948b1bb8
RR
205 int num_counters;
206 int num_counters_fixed;
207 int cntval_bits;
208 u64 cntval_mask;
04da8a43 209 int apic;
c619b8ff 210 u64 max_period;
63b14649
PZ
211 struct event_constraint *
212 (*get_event_constraints)(struct cpu_hw_events *cpuc,
213 struct perf_event *event);
214
c91e0f5d
PZ
215 void (*put_event_constraints)(struct cpu_hw_events *cpuc,
216 struct perf_event *event);
63b14649 217 struct event_constraint *event_constraints;
3c44780b 218 void (*quirks)(void);
3f6da390 219
b38b24ea 220 int (*cpu_prepare)(int cpu);
3f6da390
PZ
221 void (*cpu_starting)(int cpu);
222 void (*cpu_dying)(int cpu);
223 void (*cpu_dead)(int cpu);
ca037701
PZ
224
225 /*
226 * Intel Arch Perfmon v2+
227 */
8db909a7
PZ
228 u64 intel_ctrl;
229 union perf_capabilities intel_cap;
ca037701
PZ
230
231 /*
232 * Intel DebugStore bits
233 */
234 int bts, pebs;
235 int pebs_record_size;
236 void (*drain_pebs)(struct pt_regs *regs);
237 struct event_constraint *pebs_constraints;
caff2bef
PZ
238
239 /*
240 * Intel LBR
241 */
242 unsigned long lbr_tos, lbr_from, lbr_to; /* MSR base regs */
243 int lbr_nr; /* hardware stack size */
b56a3802
JSR
244};
245
4a06bd85 246static struct x86_pmu x86_pmu __read_mostly;
b56a3802 247
cdd6c482 248static DEFINE_PER_CPU(struct cpu_hw_events, cpu_hw_events) = {
b0f3f28e
PZ
249 .enabled = 1,
250};
241771ef 251
07088edb 252static int x86_perf_event_set_period(struct perf_event *event);
b690081d 253
8326f44d 254/*
dfc65094 255 * Generalized hw caching related hw_event table, filled
8326f44d 256 * in on a per model basis. A value of 0 means
dfc65094
IM
257 * 'not supported', -1 means 'hw_event makes no sense on
258 * this CPU', any other value means the raw hw_event
8326f44d
IM
259 * ID.
260 */
261
262#define C(x) PERF_COUNT_HW_CACHE_##x
263
264static u64 __read_mostly hw_cache_event_ids
265 [PERF_COUNT_HW_CACHE_MAX]
266 [PERF_COUNT_HW_CACHE_OP_MAX]
267 [PERF_COUNT_HW_CACHE_RESULT_MAX];
268
ee06094f 269/*
cdd6c482
IM
270 * Propagate event elapsed time into the generic event.
271 * Can only be executed on the CPU where the event is active.
ee06094f
IM
272 * Returns the delta events processed.
273 */
4b7bfd0d 274static u64
cc2ad4ba 275x86_perf_event_update(struct perf_event *event)
ee06094f 276{
cc2ad4ba 277 struct hw_perf_event *hwc = &event->hw;
948b1bb8 278 int shift = 64 - x86_pmu.cntval_bits;
ec3232bd 279 u64 prev_raw_count, new_raw_count;
cc2ad4ba 280 int idx = hwc->idx;
ec3232bd 281 s64 delta;
ee06094f 282
30dd568c
MM
283 if (idx == X86_PMC_IDX_FIXED_BTS)
284 return 0;
285
ee06094f 286 /*
cdd6c482 287 * Careful: an NMI might modify the previous event value.
ee06094f
IM
288 *
289 * Our tactic to handle this is to first atomically read and
290 * exchange a new raw count - then add that new-prev delta
cdd6c482 291 * count to the generic event atomically:
ee06094f
IM
292 */
293again:
294 prev_raw_count = atomic64_read(&hwc->prev_count);
cdd6c482 295 rdmsrl(hwc->event_base + idx, new_raw_count);
ee06094f
IM
296
297 if (atomic64_cmpxchg(&hwc->prev_count, prev_raw_count,
298 new_raw_count) != prev_raw_count)
299 goto again;
300
301 /*
302 * Now we have the new raw value and have updated the prev
303 * timestamp already. We can now calculate the elapsed delta
cdd6c482 304 * (event-)time and add that to the generic event.
ee06094f
IM
305 *
306 * Careful, not all hw sign-extends above the physical width
ec3232bd 307 * of the count.
ee06094f 308 */
ec3232bd
PZ
309 delta = (new_raw_count << shift) - (prev_raw_count << shift);
310 delta >>= shift;
ee06094f 311
cdd6c482 312 atomic64_add(delta, &event->count);
ee06094f 313 atomic64_sub(delta, &hwc->period_left);
4b7bfd0d
RR
314
315 return new_raw_count;
ee06094f
IM
316}
317
cdd6c482 318static atomic_t active_events;
4e935e47
PZ
319static DEFINE_MUTEX(pmc_reserve_mutex);
320
b27ea29c
RR
321#ifdef CONFIG_X86_LOCAL_APIC
322
4e935e47
PZ
323static bool reserve_pmc_hardware(void)
324{
325 int i;
326
327 if (nmi_watchdog == NMI_LOCAL_APIC)
328 disable_lapic_nmi_watchdog();
329
948b1bb8 330 for (i = 0; i < x86_pmu.num_counters; i++) {
4a06bd85 331 if (!reserve_perfctr_nmi(x86_pmu.perfctr + i))
4e935e47
PZ
332 goto perfctr_fail;
333 }
334
948b1bb8 335 for (i = 0; i < x86_pmu.num_counters; i++) {
4a06bd85 336 if (!reserve_evntsel_nmi(x86_pmu.eventsel + i))
4e935e47
PZ
337 goto eventsel_fail;
338 }
339
340 return true;
341
342eventsel_fail:
343 for (i--; i >= 0; i--)
4a06bd85 344 release_evntsel_nmi(x86_pmu.eventsel + i);
4e935e47 345
948b1bb8 346 i = x86_pmu.num_counters;
4e935e47
PZ
347
348perfctr_fail:
349 for (i--; i >= 0; i--)
4a06bd85 350 release_perfctr_nmi(x86_pmu.perfctr + i);
4e935e47
PZ
351
352 if (nmi_watchdog == NMI_LOCAL_APIC)
353 enable_lapic_nmi_watchdog();
354
355 return false;
356}
357
358static void release_pmc_hardware(void)
359{
360 int i;
361
948b1bb8 362 for (i = 0; i < x86_pmu.num_counters; i++) {
4a06bd85
RR
363 release_perfctr_nmi(x86_pmu.perfctr + i);
364 release_evntsel_nmi(x86_pmu.eventsel + i);
4e935e47
PZ
365 }
366
367 if (nmi_watchdog == NMI_LOCAL_APIC)
368 enable_lapic_nmi_watchdog();
369}
370
b27ea29c
RR
371#else
372
373static bool reserve_pmc_hardware(void) { return true; }
374static void release_pmc_hardware(void) {}
375
376#endif
377
ca037701
PZ
378static int reserve_ds_buffers(void);
379static void release_ds_buffers(void);
30dd568c 380
cdd6c482 381static void hw_perf_event_destroy(struct perf_event *event)
4e935e47 382{
cdd6c482 383 if (atomic_dec_and_mutex_lock(&active_events, &pmc_reserve_mutex)) {
4e935e47 384 release_pmc_hardware();
ca037701 385 release_ds_buffers();
4e935e47
PZ
386 mutex_unlock(&pmc_reserve_mutex);
387 }
388}
389
85cf9dba
RR
390static inline int x86_pmu_initialized(void)
391{
392 return x86_pmu.handle_irq != NULL;
393}
394
8326f44d 395static inline int
cdd6c482 396set_ext_hw_attr(struct hw_perf_event *hwc, struct perf_event_attr *attr)
8326f44d
IM
397{
398 unsigned int cache_type, cache_op, cache_result;
399 u64 config, val;
400
401 config = attr->config;
402
403 cache_type = (config >> 0) & 0xff;
404 if (cache_type >= PERF_COUNT_HW_CACHE_MAX)
405 return -EINVAL;
406
407 cache_op = (config >> 8) & 0xff;
408 if (cache_op >= PERF_COUNT_HW_CACHE_OP_MAX)
409 return -EINVAL;
410
411 cache_result = (config >> 16) & 0xff;
412 if (cache_result >= PERF_COUNT_HW_CACHE_RESULT_MAX)
413 return -EINVAL;
414
415 val = hw_cache_event_ids[cache_type][cache_op][cache_result];
416
417 if (val == 0)
418 return -ENOENT;
419
420 if (val == -1)
421 return -EINVAL;
422
423 hwc->config |= val;
424
425 return 0;
426}
427
b4cdc5c2 428static int x86_pmu_hw_config(struct perf_event *event)
a072738e
CG
429{
430 /*
431 * Generate PMC IRQs:
432 * (keep 'enabled' bit clear for now)
433 */
b4cdc5c2 434 event->hw.config = ARCH_PERFMON_EVENTSEL_INT;
a072738e
CG
435
436 /*
437 * Count user and OS events unless requested not to
438 */
b4cdc5c2
PZ
439 if (!event->attr.exclude_user)
440 event->hw.config |= ARCH_PERFMON_EVENTSEL_USR;
441 if (!event->attr.exclude_kernel)
442 event->hw.config |= ARCH_PERFMON_EVENTSEL_OS;
a072738e 443
b4cdc5c2
PZ
444 if (event->attr.type == PERF_TYPE_RAW)
445 event->hw.config |= event->attr.config & X86_RAW_EVENT_MASK;
a072738e 446
b4cdc5c2 447 return 0;
a098f448
RR
448}
449
241771ef 450/*
0d48696f 451 * Setup the hardware configuration for a given attr_type
241771ef 452 */
cdd6c482 453static int __hw_perf_event_init(struct perf_event *event)
241771ef 454{
cdd6c482
IM
455 struct perf_event_attr *attr = &event->attr;
456 struct hw_perf_event *hwc = &event->hw;
9c74fb50 457 u64 config;
4e935e47 458 int err;
241771ef 459
85cf9dba
RR
460 if (!x86_pmu_initialized())
461 return -ENODEV;
241771ef 462
4e935e47 463 err = 0;
cdd6c482 464 if (!atomic_inc_not_zero(&active_events)) {
4e935e47 465 mutex_lock(&pmc_reserve_mutex);
cdd6c482 466 if (atomic_read(&active_events) == 0) {
30dd568c
MM
467 if (!reserve_pmc_hardware())
468 err = -EBUSY;
4b24a88b 469 else {
ca037701 470 err = reserve_ds_buffers();
4b24a88b
SE
471 if (err)
472 release_pmc_hardware();
473 }
30dd568c
MM
474 }
475 if (!err)
cdd6c482 476 atomic_inc(&active_events);
4e935e47
PZ
477 mutex_unlock(&pmc_reserve_mutex);
478 }
479 if (err)
480 return err;
481
cdd6c482 482 event->destroy = hw_perf_event_destroy;
a1792cda 483
b690081d 484 hwc->idx = -1;
447a194b
SE
485 hwc->last_cpu = -1;
486 hwc->last_tag = ~0ULL;
b690081d 487
a072738e 488 /* Processor specifics */
b4cdc5c2 489 err = x86_pmu.hw_config(event);
984763cb
RR
490 if (err)
491 return err;
0475f9ea 492
bd2b5b12 493 if (!hwc->sample_period) {
b23f3325 494 hwc->sample_period = x86_pmu.max_period;
9e350de3 495 hwc->last_period = hwc->sample_period;
bd2b5b12 496 atomic64_set(&hwc->period_left, hwc->sample_period);
04da8a43
IM
497 } else {
498 /*
499 * If we have a PMU initialized but no APIC
500 * interrupts, we cannot sample hardware
cdd6c482
IM
501 * events (user-space has to fall back and
502 * sample via a hrtimer based software event):
04da8a43
IM
503 */
504 if (!x86_pmu.apic)
505 return -EOPNOTSUPP;
bd2b5b12 506 }
d2517a49 507
b4cdc5c2 508 if (attr->type == PERF_TYPE_RAW)
8326f44d 509 return 0;
241771ef 510
8326f44d
IM
511 if (attr->type == PERF_TYPE_HW_CACHE)
512 return set_ext_hw_attr(hwc, attr);
513
514 if (attr->config >= x86_pmu.max_events)
515 return -EINVAL;
9c74fb50 516
8326f44d
IM
517 /*
518 * The generic map:
519 */
9c74fb50
PZ
520 config = x86_pmu.event_map(attr->config);
521
522 if (config == 0)
523 return -ENOENT;
524
525 if (config == -1LL)
526 return -EINVAL;
527
747b50aa 528 /*
529 * Branch tracing:
530 */
531 if ((attr->config == PERF_COUNT_HW_BRANCH_INSTRUCTIONS) &&
1653192f 532 (hwc->sample_period == 1)) {
533 /* BTS is not supported by this architecture. */
ca037701 534 if (!x86_pmu.bts)
1653192f 535 return -EOPNOTSUPP;
536
537 /* BTS is currently only allowed for user-mode. */
a072738e 538 if (!attr->exclude_kernel)
1653192f 539 return -EOPNOTSUPP;
540 }
747b50aa 541
9c74fb50 542 hwc->config |= config;
4e935e47 543
241771ef
IM
544 return 0;
545}
546
8c48e444 547static void x86_pmu_disable_all(void)
f87ad35d 548{
cdd6c482 549 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
9e35ad38
PZ
550 int idx;
551
948b1bb8 552 for (idx = 0; idx < x86_pmu.num_counters; idx++) {
b0f3f28e
PZ
553 u64 val;
554
43f6201a 555 if (!test_bit(idx, cpuc->active_mask))
4295ee62 556 continue;
8c48e444 557 rdmsrl(x86_pmu.eventsel + idx, val);
bb1165d6 558 if (!(val & ARCH_PERFMON_EVENTSEL_ENABLE))
4295ee62 559 continue;
bb1165d6 560 val &= ~ARCH_PERFMON_EVENTSEL_ENABLE;
8c48e444 561 wrmsrl(x86_pmu.eventsel + idx, val);
f87ad35d 562 }
f87ad35d
JSR
563}
564
9e35ad38 565void hw_perf_disable(void)
b56a3802 566{
1da53e02
SE
567 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
568
85cf9dba 569 if (!x86_pmu_initialized())
9e35ad38 570 return;
1da53e02 571
1a6e21f7
PZ
572 if (!cpuc->enabled)
573 return;
574
575 cpuc->n_added = 0;
576 cpuc->enabled = 0;
577 barrier();
1da53e02
SE
578
579 x86_pmu.disable_all();
b56a3802 580}
241771ef 581
11164cd4 582static void x86_pmu_enable_all(int added)
f87ad35d 583{
cdd6c482 584 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
f87ad35d
JSR
585 int idx;
586
948b1bb8 587 for (idx = 0; idx < x86_pmu.num_counters; idx++) {
cdd6c482 588 struct perf_event *event = cpuc->events[idx];
4295ee62 589 u64 val;
b0f3f28e 590
43f6201a 591 if (!test_bit(idx, cpuc->active_mask))
4295ee62 592 continue;
984b838c 593
cdd6c482 594 val = event->hw.config;
bb1165d6 595 val |= ARCH_PERFMON_EVENTSEL_ENABLE;
8c48e444 596 wrmsrl(x86_pmu.eventsel + idx, val);
f87ad35d
JSR
597 }
598}
599
1da53e02
SE
600static const struct pmu pmu;
601
602static inline int is_x86_event(struct perf_event *event)
603{
604 return event->pmu == &pmu;
605}
606
607static int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign)
608{
63b14649 609 struct event_constraint *c, *constraints[X86_PMC_IDX_MAX];
1da53e02 610 unsigned long used_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)];
c933c1a6 611 int i, j, w, wmax, num = 0;
1da53e02
SE
612 struct hw_perf_event *hwc;
613
614 bitmap_zero(used_mask, X86_PMC_IDX_MAX);
615
616 for (i = 0; i < n; i++) {
b622d644
PZ
617 c = x86_pmu.get_event_constraints(cpuc, cpuc->event_list[i]);
618 constraints[i] = c;
1da53e02
SE
619 }
620
8113070d
SE
621 /*
622 * fastpath, try to reuse previous register
623 */
c933c1a6 624 for (i = 0; i < n; i++) {
8113070d 625 hwc = &cpuc->event_list[i]->hw;
81269a08 626 c = constraints[i];
8113070d
SE
627
628 /* never assigned */
629 if (hwc->idx == -1)
630 break;
631
632 /* constraint still honored */
63b14649 633 if (!test_bit(hwc->idx, c->idxmsk))
8113070d
SE
634 break;
635
636 /* not already used */
637 if (test_bit(hwc->idx, used_mask))
638 break;
639
34538ee7 640 __set_bit(hwc->idx, used_mask);
8113070d
SE
641 if (assign)
642 assign[i] = hwc->idx;
643 }
c933c1a6 644 if (i == n)
8113070d
SE
645 goto done;
646
647 /*
648 * begin slow path
649 */
650
651 bitmap_zero(used_mask, X86_PMC_IDX_MAX);
652
1da53e02
SE
653 /*
654 * weight = number of possible counters
655 *
656 * 1 = most constrained, only works on one counter
657 * wmax = least constrained, works on any counter
658 *
659 * assign events to counters starting with most
660 * constrained events.
661 */
948b1bb8 662 wmax = x86_pmu.num_counters;
1da53e02
SE
663
664 /*
665 * when fixed event counters are present,
666 * wmax is incremented by 1 to account
667 * for one more choice
668 */
948b1bb8 669 if (x86_pmu.num_counters_fixed)
1da53e02
SE
670 wmax++;
671
8113070d 672 for (w = 1, num = n; num && w <= wmax; w++) {
1da53e02 673 /* for each event */
8113070d 674 for (i = 0; num && i < n; i++) {
81269a08 675 c = constraints[i];
1da53e02
SE
676 hwc = &cpuc->event_list[i]->hw;
677
272d30be 678 if (c->weight != w)
1da53e02
SE
679 continue;
680
984b3f57 681 for_each_set_bit(j, c->idxmsk, X86_PMC_IDX_MAX) {
1da53e02
SE
682 if (!test_bit(j, used_mask))
683 break;
684 }
685
686 if (j == X86_PMC_IDX_MAX)
687 break;
1da53e02 688
34538ee7 689 __set_bit(j, used_mask);
8113070d 690
1da53e02
SE
691 if (assign)
692 assign[i] = j;
693 num--;
694 }
695 }
8113070d 696done:
1da53e02
SE
697 /*
698 * scheduling failed or is just a simulation,
699 * free resources if necessary
700 */
701 if (!assign || num) {
702 for (i = 0; i < n; i++) {
703 if (x86_pmu.put_event_constraints)
704 x86_pmu.put_event_constraints(cpuc, cpuc->event_list[i]);
705 }
706 }
707 return num ? -ENOSPC : 0;
708}
709
710/*
711 * dogrp: true if must collect siblings events (group)
712 * returns total number of events and error code
713 */
714static int collect_events(struct cpu_hw_events *cpuc, struct perf_event *leader, bool dogrp)
715{
716 struct perf_event *event;
717 int n, max_count;
718
948b1bb8 719 max_count = x86_pmu.num_counters + x86_pmu.num_counters_fixed;
1da53e02
SE
720
721 /* current number of events already accepted */
722 n = cpuc->n_events;
723
724 if (is_x86_event(leader)) {
725 if (n >= max_count)
726 return -ENOSPC;
727 cpuc->event_list[n] = leader;
728 n++;
729 }
730 if (!dogrp)
731 return n;
732
733 list_for_each_entry(event, &leader->sibling_list, group_entry) {
734 if (!is_x86_event(event) ||
8113070d 735 event->state <= PERF_EVENT_STATE_OFF)
1da53e02
SE
736 continue;
737
738 if (n >= max_count)
739 return -ENOSPC;
740
741 cpuc->event_list[n] = event;
742 n++;
743 }
744 return n;
745}
746
1da53e02 747static inline void x86_assign_hw_event(struct perf_event *event,
447a194b 748 struct cpu_hw_events *cpuc, int i)
1da53e02 749{
447a194b
SE
750 struct hw_perf_event *hwc = &event->hw;
751
752 hwc->idx = cpuc->assign[i];
753 hwc->last_cpu = smp_processor_id();
754 hwc->last_tag = ++cpuc->tags[i];
1da53e02
SE
755
756 if (hwc->idx == X86_PMC_IDX_FIXED_BTS) {
757 hwc->config_base = 0;
758 hwc->event_base = 0;
759 } else if (hwc->idx >= X86_PMC_IDX_FIXED) {
760 hwc->config_base = MSR_ARCH_PERFMON_FIXED_CTR_CTRL;
761 /*
762 * We set it so that event_base + idx in wrmsr/rdmsr maps to
763 * MSR_ARCH_PERFMON_FIXED_CTR0 ... CTR2:
764 */
765 hwc->event_base =
766 MSR_ARCH_PERFMON_FIXED_CTR0 - X86_PMC_IDX_FIXED;
767 } else {
768 hwc->config_base = x86_pmu.eventsel;
769 hwc->event_base = x86_pmu.perfctr;
770 }
771}
772
447a194b
SE
773static inline int match_prev_assignment(struct hw_perf_event *hwc,
774 struct cpu_hw_events *cpuc,
775 int i)
776{
777 return hwc->idx == cpuc->assign[i] &&
778 hwc->last_cpu == smp_processor_id() &&
779 hwc->last_tag == cpuc->tags[i];
780}
781
c08053e6 782static int x86_pmu_start(struct perf_event *event);
d76a0812 783static void x86_pmu_stop(struct perf_event *event);
2e841873 784
9e35ad38 785void hw_perf_enable(void)
ee06094f 786{
1da53e02
SE
787 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
788 struct perf_event *event;
789 struct hw_perf_event *hwc;
11164cd4 790 int i, added = cpuc->n_added;
1da53e02 791
85cf9dba 792 if (!x86_pmu_initialized())
2b9ff0db 793 return;
1a6e21f7
PZ
794
795 if (cpuc->enabled)
796 return;
797
1da53e02 798 if (cpuc->n_added) {
19925ce7 799 int n_running = cpuc->n_events - cpuc->n_added;
1da53e02
SE
800 /*
801 * apply assignment obtained either from
802 * hw_perf_group_sched_in() or x86_pmu_enable()
803 *
804 * step1: save events moving to new counters
805 * step2: reprogram moved events into new counters
806 */
19925ce7 807 for (i = 0; i < n_running; i++) {
1da53e02
SE
808 event = cpuc->event_list[i];
809 hwc = &event->hw;
810
447a194b
SE
811 /*
812 * we can avoid reprogramming counter if:
813 * - assigned same counter as last time
814 * - running on same CPU as last time
815 * - no other event has used the counter since
816 */
817 if (hwc->idx == -1 ||
818 match_prev_assignment(hwc, cpuc, i))
1da53e02
SE
819 continue;
820
d76a0812 821 x86_pmu_stop(event);
1da53e02
SE
822 }
823
824 for (i = 0; i < cpuc->n_events; i++) {
1da53e02
SE
825 event = cpuc->event_list[i];
826 hwc = &event->hw;
827
45e16a68 828 if (!match_prev_assignment(hwc, cpuc, i))
447a194b 829 x86_assign_hw_event(event, cpuc, i);
45e16a68
PZ
830 else if (i < n_running)
831 continue;
1da53e02 832
c08053e6 833 x86_pmu_start(event);
1da53e02
SE
834 }
835 cpuc->n_added = 0;
836 perf_events_lapic_init();
837 }
1a6e21f7
PZ
838
839 cpuc->enabled = 1;
840 barrier();
841
11164cd4 842 x86_pmu.enable_all(added);
ee06094f 843}
ee06094f 844
aff3d91a 845static inline void __x86_pmu_enable_event(struct hw_perf_event *hwc)
b0f3f28e 846{
7645a24c 847 wrmsrl(hwc->config_base + hwc->idx,
bb1165d6 848 hwc->config | ARCH_PERFMON_EVENTSEL_ENABLE);
b0f3f28e
PZ
849}
850
aff3d91a 851static inline void x86_pmu_disable_event(struct perf_event *event)
b0f3f28e 852{
aff3d91a 853 struct hw_perf_event *hwc = &event->hw;
7645a24c
PZ
854
855 wrmsrl(hwc->config_base + hwc->idx, hwc->config);
b0f3f28e
PZ
856}
857
245b2e70 858static DEFINE_PER_CPU(u64 [X86_PMC_IDX_MAX], pmc_prev_left);
241771ef 859
ee06094f
IM
860/*
861 * Set the next IRQ period, based on the hwc->period_left value.
cdd6c482 862 * To be called with the event disabled in hw:
ee06094f 863 */
e4abb5d4 864static int
07088edb 865x86_perf_event_set_period(struct perf_event *event)
241771ef 866{
07088edb 867 struct hw_perf_event *hwc = &event->hw;
2f18d1e8 868 s64 left = atomic64_read(&hwc->period_left);
e4abb5d4 869 s64 period = hwc->sample_period;
7645a24c 870 int ret = 0, idx = hwc->idx;
ee06094f 871
30dd568c
MM
872 if (idx == X86_PMC_IDX_FIXED_BTS)
873 return 0;
874
ee06094f 875 /*
af901ca1 876 * If we are way outside a reasonable range then just skip forward:
ee06094f
IM
877 */
878 if (unlikely(left <= -period)) {
879 left = period;
880 atomic64_set(&hwc->period_left, left);
9e350de3 881 hwc->last_period = period;
e4abb5d4 882 ret = 1;
ee06094f
IM
883 }
884
885 if (unlikely(left <= 0)) {
886 left += period;
887 atomic64_set(&hwc->period_left, left);
9e350de3 888 hwc->last_period = period;
e4abb5d4 889 ret = 1;
ee06094f 890 }
1c80f4b5 891 /*
dfc65094 892 * Quirk: certain CPUs dont like it if just 1 hw_event is left:
1c80f4b5
IM
893 */
894 if (unlikely(left < 2))
895 left = 2;
241771ef 896
e4abb5d4
PZ
897 if (left > x86_pmu.max_period)
898 left = x86_pmu.max_period;
899
245b2e70 900 per_cpu(pmc_prev_left[idx], smp_processor_id()) = left;
ee06094f
IM
901
902 /*
cdd6c482 903 * The hw event starts counting from this event offset,
ee06094f
IM
904 * mark it to be able to extra future deltas:
905 */
2f18d1e8 906 atomic64_set(&hwc->prev_count, (u64)-left);
ee06094f 907
7645a24c 908 wrmsrl(hwc->event_base + idx,
948b1bb8 909 (u64)(-left) & x86_pmu.cntval_mask);
e4abb5d4 910
cdd6c482 911 perf_event_update_userpage(event);
194002b2 912
e4abb5d4 913 return ret;
2f18d1e8
IM
914}
915
aff3d91a 916static void x86_pmu_enable_event(struct perf_event *event)
7c90cc45 917{
cdd6c482 918 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
7c90cc45 919 if (cpuc->enabled)
aff3d91a 920 __x86_pmu_enable_event(&event->hw);
241771ef
IM
921}
922
b690081d 923/*
1da53e02
SE
924 * activate a single event
925 *
926 * The event is added to the group of enabled events
927 * but only if it can be scehduled with existing events.
928 *
929 * Called with PMU disabled. If successful and return value 1,
930 * then guaranteed to call perf_enable() and hw_perf_enable()
fe9081cc
PZ
931 */
932static int x86_pmu_enable(struct perf_event *event)
933{
934 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
1da53e02
SE
935 struct hw_perf_event *hwc;
936 int assign[X86_PMC_IDX_MAX];
937 int n, n0, ret;
fe9081cc 938
1da53e02 939 hwc = &event->hw;
fe9081cc 940
1da53e02
SE
941 n0 = cpuc->n_events;
942 n = collect_events(cpuc, event, false);
943 if (n < 0)
944 return n;
53b441a5 945
a072738e 946 ret = x86_pmu.schedule_events(cpuc, n, assign);
1da53e02
SE
947 if (ret)
948 return ret;
949 /*
950 * copy new assignment, now we know it is possible
951 * will be used by hw_perf_enable()
952 */
953 memcpy(cpuc->assign, assign, n*sizeof(int));
7e2ae347 954
1da53e02 955 cpuc->n_events = n;
356e1f2e 956 cpuc->n_added += n - n0;
95cdd2e7
IM
957
958 return 0;
241771ef
IM
959}
960
d76a0812
SE
961static int x86_pmu_start(struct perf_event *event)
962{
c08053e6
PZ
963 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
964 int idx = event->hw.idx;
965
966 if (idx == -1)
d76a0812
SE
967 return -EAGAIN;
968
07088edb 969 x86_perf_event_set_period(event);
c08053e6
PZ
970 cpuc->events[idx] = event;
971 __set_bit(idx, cpuc->active_mask);
aff3d91a 972 x86_pmu.enable(event);
c08053e6 973 perf_event_update_userpage(event);
d76a0812
SE
974
975 return 0;
976}
977
cdd6c482 978static void x86_pmu_unthrottle(struct perf_event *event)
a78ac325 979{
71e2d282
PZ
980 int ret = x86_pmu_start(event);
981 WARN_ON_ONCE(ret);
a78ac325
PZ
982}
983
cdd6c482 984void perf_event_print_debug(void)
241771ef 985{
2f18d1e8 986 u64 ctrl, status, overflow, pmc_ctrl, pmc_count, prev_left, fixed;
ca037701 987 u64 pebs;
cdd6c482 988 struct cpu_hw_events *cpuc;
5bb9efe3 989 unsigned long flags;
1e125676
IM
990 int cpu, idx;
991
948b1bb8 992 if (!x86_pmu.num_counters)
1e125676 993 return;
241771ef 994
5bb9efe3 995 local_irq_save(flags);
241771ef
IM
996
997 cpu = smp_processor_id();
cdd6c482 998 cpuc = &per_cpu(cpu_hw_events, cpu);
241771ef 999
faa28ae0 1000 if (x86_pmu.version >= 2) {
a1ef58f4
JSR
1001 rdmsrl(MSR_CORE_PERF_GLOBAL_CTRL, ctrl);
1002 rdmsrl(MSR_CORE_PERF_GLOBAL_STATUS, status);
1003 rdmsrl(MSR_CORE_PERF_GLOBAL_OVF_CTRL, overflow);
1004 rdmsrl(MSR_ARCH_PERFMON_FIXED_CTR_CTRL, fixed);
ca037701 1005 rdmsrl(MSR_IA32_PEBS_ENABLE, pebs);
a1ef58f4
JSR
1006
1007 pr_info("\n");
1008 pr_info("CPU#%d: ctrl: %016llx\n", cpu, ctrl);
1009 pr_info("CPU#%d: status: %016llx\n", cpu, status);
1010 pr_info("CPU#%d: overflow: %016llx\n", cpu, overflow);
1011 pr_info("CPU#%d: fixed: %016llx\n", cpu, fixed);
ca037701 1012 pr_info("CPU#%d: pebs: %016llx\n", cpu, pebs);
f87ad35d 1013 }
7645a24c 1014 pr_info("CPU#%d: active: %016llx\n", cpu, *(u64 *)cpuc->active_mask);
241771ef 1015
948b1bb8 1016 for (idx = 0; idx < x86_pmu.num_counters; idx++) {
4a06bd85
RR
1017 rdmsrl(x86_pmu.eventsel + idx, pmc_ctrl);
1018 rdmsrl(x86_pmu.perfctr + idx, pmc_count);
241771ef 1019
245b2e70 1020 prev_left = per_cpu(pmc_prev_left[idx], cpu);
241771ef 1021
a1ef58f4 1022 pr_info("CPU#%d: gen-PMC%d ctrl: %016llx\n",
241771ef 1023 cpu, idx, pmc_ctrl);
a1ef58f4 1024 pr_info("CPU#%d: gen-PMC%d count: %016llx\n",
241771ef 1025 cpu, idx, pmc_count);
a1ef58f4 1026 pr_info("CPU#%d: gen-PMC%d left: %016llx\n",
ee06094f 1027 cpu, idx, prev_left);
241771ef 1028 }
948b1bb8 1029 for (idx = 0; idx < x86_pmu.num_counters_fixed; idx++) {
2f18d1e8
IM
1030 rdmsrl(MSR_ARCH_PERFMON_FIXED_CTR0 + idx, pmc_count);
1031
a1ef58f4 1032 pr_info("CPU#%d: fixed-PMC%d count: %016llx\n",
2f18d1e8
IM
1033 cpu, idx, pmc_count);
1034 }
5bb9efe3 1035 local_irq_restore(flags);
241771ef
IM
1036}
1037
d76a0812 1038static void x86_pmu_stop(struct perf_event *event)
241771ef 1039{
d76a0812 1040 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
cdd6c482 1041 struct hw_perf_event *hwc = &event->hw;
2e841873 1042 int idx = hwc->idx;
241771ef 1043
71e2d282
PZ
1044 if (!__test_and_clear_bit(idx, cpuc->active_mask))
1045 return;
1046
aff3d91a 1047 x86_pmu.disable(event);
241771ef 1048
ee06094f 1049 /*
cdd6c482 1050 * Drain the remaining delta count out of a event
ee06094f
IM
1051 * that we are disabling:
1052 */
cc2ad4ba 1053 x86_perf_event_update(event);
30dd568c 1054
cdd6c482 1055 cpuc->events[idx] = NULL;
2e841873
PZ
1056}
1057
1058static void x86_pmu_disable(struct perf_event *event)
1059{
1060 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
1061 int i;
1062
d76a0812 1063 x86_pmu_stop(event);
194002b2 1064
1da53e02
SE
1065 for (i = 0; i < cpuc->n_events; i++) {
1066 if (event == cpuc->event_list[i]) {
1067
1068 if (x86_pmu.put_event_constraints)
1069 x86_pmu.put_event_constraints(cpuc, event);
1070
1071 while (++i < cpuc->n_events)
1072 cpuc->event_list[i-1] = cpuc->event_list[i];
1073
1074 --cpuc->n_events;
6c9687ab 1075 break;
1da53e02
SE
1076 }
1077 }
cdd6c482 1078 perf_event_update_userpage(event);
241771ef
IM
1079}
1080
8c48e444 1081static int x86_pmu_handle_irq(struct pt_regs *regs)
a29aa8a7 1082{
df1a132b 1083 struct perf_sample_data data;
cdd6c482
IM
1084 struct cpu_hw_events *cpuc;
1085 struct perf_event *event;
1086 struct hw_perf_event *hwc;
11d1578f 1087 int idx, handled = 0;
9029a5e3
IM
1088 u64 val;
1089
dc1d628a 1090 perf_sample_data_init(&data, 0);
df1a132b 1091
cdd6c482 1092 cpuc = &__get_cpu_var(cpu_hw_events);
962bf7a6 1093
948b1bb8 1094 for (idx = 0; idx < x86_pmu.num_counters; idx++) {
43f6201a 1095 if (!test_bit(idx, cpuc->active_mask))
a29aa8a7 1096 continue;
962bf7a6 1097
cdd6c482
IM
1098 event = cpuc->events[idx];
1099 hwc = &event->hw;
a4016a79 1100
cc2ad4ba 1101 val = x86_perf_event_update(event);
948b1bb8 1102 if (val & (1ULL << (x86_pmu.cntval_bits - 1)))
48e22d56 1103 continue;
962bf7a6 1104
9e350de3 1105 /*
cdd6c482 1106 * event overflow
9e350de3
PZ
1107 */
1108 handled = 1;
cdd6c482 1109 data.period = event->hw.last_period;
9e350de3 1110
07088edb 1111 if (!x86_perf_event_set_period(event))
e4abb5d4
PZ
1112 continue;
1113
cdd6c482 1114 if (perf_event_overflow(event, 1, &data, regs))
71e2d282 1115 x86_pmu_stop(event);
a29aa8a7 1116 }
962bf7a6 1117
9e350de3
PZ
1118 if (handled)
1119 inc_irq_stat(apic_perf_irqs);
1120
a29aa8a7
RR
1121 return handled;
1122}
39d81eab 1123
b6276f35
PZ
1124void smp_perf_pending_interrupt(struct pt_regs *regs)
1125{
1126 irq_enter();
1127 ack_APIC_irq();
1128 inc_irq_stat(apic_pending_irqs);
cdd6c482 1129 perf_event_do_pending();
b6276f35
PZ
1130 irq_exit();
1131}
1132
cdd6c482 1133void set_perf_event_pending(void)
b6276f35 1134{
04da8a43 1135#ifdef CONFIG_X86_LOCAL_APIC
7d428966
PZ
1136 if (!x86_pmu.apic || !x86_pmu_initialized())
1137 return;
1138
b6276f35 1139 apic->send_IPI_self(LOCAL_PENDING_VECTOR);
04da8a43 1140#endif
b6276f35
PZ
1141}
1142
cdd6c482 1143void perf_events_lapic_init(void)
241771ef 1144{
04da8a43 1145 if (!x86_pmu.apic || !x86_pmu_initialized())
241771ef 1146 return;
85cf9dba 1147
241771ef 1148 /*
c323d95f 1149 * Always use NMI for PMU
241771ef 1150 */
c323d95f 1151 apic_write(APIC_LVTPC, APIC_DM_NMI);
241771ef
IM
1152}
1153
1154static int __kprobes
cdd6c482 1155perf_event_nmi_handler(struct notifier_block *self,
241771ef
IM
1156 unsigned long cmd, void *__args)
1157{
1158 struct die_args *args = __args;
1159 struct pt_regs *regs;
b0f3f28e 1160
cdd6c482 1161 if (!atomic_read(&active_events))
63a809a2
PZ
1162 return NOTIFY_DONE;
1163
b0f3f28e
PZ
1164 switch (cmd) {
1165 case DIE_NMI:
1166 case DIE_NMI_IPI:
1167 break;
241771ef 1168
b0f3f28e 1169 default:
241771ef 1170 return NOTIFY_DONE;
b0f3f28e 1171 }
241771ef
IM
1172
1173 regs = args->regs;
1174
1175 apic_write(APIC_LVTPC, APIC_DM_NMI);
a4016a79
PZ
1176 /*
1177 * Can't rely on the handled return value to say it was our NMI, two
cdd6c482 1178 * events could trigger 'simultaneously' raising two back-to-back NMIs.
a4016a79
PZ
1179 *
1180 * If the first NMI handles both, the latter will be empty and daze
1181 * the CPU.
1182 */
a3288106 1183 x86_pmu.handle_irq(regs);
241771ef 1184
a4016a79 1185 return NOTIFY_STOP;
241771ef
IM
1186}
1187
f22f54f4
PZ
1188static __read_mostly struct notifier_block perf_event_nmi_notifier = {
1189 .notifier_call = perf_event_nmi_handler,
1190 .next = NULL,
1191 .priority = 1
1192};
1193
63b14649 1194static struct event_constraint unconstrained;
38331f62 1195static struct event_constraint emptyconstraint;
63b14649 1196
63b14649 1197static struct event_constraint *
f22f54f4 1198x86_get_event_constraints(struct cpu_hw_events *cpuc, struct perf_event *event)
1da53e02 1199{
63b14649 1200 struct event_constraint *c;
1da53e02 1201
1da53e02
SE
1202 if (x86_pmu.event_constraints) {
1203 for_each_event_constraint(c, x86_pmu.event_constraints) {
63b14649
PZ
1204 if ((event->hw.config & c->cmask) == c->code)
1205 return c;
1da53e02
SE
1206 }
1207 }
63b14649
PZ
1208
1209 return &unconstrained;
1da53e02
SE
1210}
1211
1da53e02 1212static int x86_event_sched_in(struct perf_event *event,
6e37738a 1213 struct perf_cpu_context *cpuctx)
1da53e02
SE
1214{
1215 int ret = 0;
1216
1217 event->state = PERF_EVENT_STATE_ACTIVE;
6e37738a 1218 event->oncpu = smp_processor_id();
1da53e02
SE
1219 event->tstamp_running += event->ctx->time - event->tstamp_stopped;
1220
1221 if (!is_x86_event(event))
1222 ret = event->pmu->enable(event);
1223
1224 if (!ret && !is_software_event(event))
1225 cpuctx->active_oncpu++;
1226
1227 if (!ret && event->attr.exclusive)
1228 cpuctx->exclusive = 1;
1229
1230 return ret;
1231}
1232
1233static void x86_event_sched_out(struct perf_event *event,
6e37738a 1234 struct perf_cpu_context *cpuctx)
1da53e02
SE
1235{
1236 event->state = PERF_EVENT_STATE_INACTIVE;
1237 event->oncpu = -1;
1238
1239 if (!is_x86_event(event))
1240 event->pmu->disable(event);
1241
1242 event->tstamp_running -= event->ctx->time - event->tstamp_stopped;
1243
1244 if (!is_software_event(event))
1245 cpuctx->active_oncpu--;
1246
1247 if (event->attr.exclusive || !cpuctx->active_oncpu)
1248 cpuctx->exclusive = 0;
1249}
1250
1251/*
1252 * Called to enable a whole group of events.
1253 * Returns 1 if the group was enabled, or -EAGAIN if it could not be.
1254 * Assumes the caller has disabled interrupts and has
1255 * frozen the PMU with hw_perf_save_disable.
1256 *
1257 * called with PMU disabled. If successful and return value 1,
1258 * then guaranteed to call perf_enable() and hw_perf_enable()
1259 */
1260int hw_perf_group_sched_in(struct perf_event *leader,
1261 struct perf_cpu_context *cpuctx,
6e37738a 1262 struct perf_event_context *ctx)
1da53e02 1263{
6e37738a 1264 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
1da53e02
SE
1265 struct perf_event *sub;
1266 int assign[X86_PMC_IDX_MAX];
1267 int n0, n1, ret;
1268
0b861225
CG
1269 if (!x86_pmu_initialized())
1270 return 0;
1271
1da53e02
SE
1272 /* n0 = total number of events */
1273 n0 = collect_events(cpuc, leader, true);
1274 if (n0 < 0)
1275 return n0;
1276
a072738e 1277 ret = x86_pmu.schedule_events(cpuc, n0, assign);
1da53e02
SE
1278 if (ret)
1279 return ret;
1280
6e37738a 1281 ret = x86_event_sched_in(leader, cpuctx);
1da53e02
SE
1282 if (ret)
1283 return ret;
1284
1285 n1 = 1;
1286 list_for_each_entry(sub, &leader->sibling_list, group_entry) {
8113070d 1287 if (sub->state > PERF_EVENT_STATE_OFF) {
6e37738a 1288 ret = x86_event_sched_in(sub, cpuctx);
1da53e02
SE
1289 if (ret)
1290 goto undo;
1291 ++n1;
1292 }
1293 }
1294 /*
1295 * copy new assignment, now we know it is possible
1296 * will be used by hw_perf_enable()
1297 */
1298 memcpy(cpuc->assign, assign, n0*sizeof(int));
1299
1300 cpuc->n_events = n0;
356e1f2e 1301 cpuc->n_added += n1;
1da53e02
SE
1302 ctx->nr_active += n1;
1303
1304 /*
1305 * 1 means successful and events are active
1306 * This is not quite true because we defer
1307 * actual activation until hw_perf_enable() but
1308 * this way we* ensure caller won't try to enable
1309 * individual events
1310 */
1311 return 1;
1312undo:
6e37738a 1313 x86_event_sched_out(leader, cpuctx);
1da53e02
SE
1314 n0 = 1;
1315 list_for_each_entry(sub, &leader->sibling_list, group_entry) {
1316 if (sub->state == PERF_EVENT_STATE_ACTIVE) {
6e37738a 1317 x86_event_sched_out(sub, cpuctx);
1da53e02
SE
1318 if (++n0 == n1)
1319 break;
1320 }
1321 }
1322 return ret;
1323}
1324
f22f54f4
PZ
1325#include "perf_event_amd.c"
1326#include "perf_event_p6.c"
a072738e 1327#include "perf_event_p4.c"
caff2bef 1328#include "perf_event_intel_lbr.c"
ca037701 1329#include "perf_event_intel_ds.c"
f22f54f4 1330#include "perf_event_intel.c"
f87ad35d 1331
3f6da390
PZ
1332static int __cpuinit
1333x86_pmu_notifier(struct notifier_block *self, unsigned long action, void *hcpu)
1334{
1335 unsigned int cpu = (long)hcpu;
b38b24ea 1336 int ret = NOTIFY_OK;
3f6da390
PZ
1337
1338 switch (action & ~CPU_TASKS_FROZEN) {
1339 case CPU_UP_PREPARE:
1340 if (x86_pmu.cpu_prepare)
b38b24ea 1341 ret = x86_pmu.cpu_prepare(cpu);
3f6da390
PZ
1342 break;
1343
1344 case CPU_STARTING:
1345 if (x86_pmu.cpu_starting)
1346 x86_pmu.cpu_starting(cpu);
1347 break;
1348
1349 case CPU_DYING:
1350 if (x86_pmu.cpu_dying)
1351 x86_pmu.cpu_dying(cpu);
1352 break;
1353
b38b24ea 1354 case CPU_UP_CANCELED:
3f6da390
PZ
1355 case CPU_DEAD:
1356 if (x86_pmu.cpu_dead)
1357 x86_pmu.cpu_dead(cpu);
1358 break;
1359
1360 default:
1361 break;
1362 }
1363
b38b24ea 1364 return ret;
3f6da390
PZ
1365}
1366
12558038
CG
1367static void __init pmu_check_apic(void)
1368{
1369 if (cpu_has_apic)
1370 return;
1371
1372 x86_pmu.apic = 0;
1373 pr_info("no APIC, boot with the \"lapic\" boot parameter to force-enable it.\n");
1374 pr_info("no hardware sampling interrupt available.\n");
1375}
1376
cdd6c482 1377void __init init_hw_perf_events(void)
b56a3802 1378{
b622d644 1379 struct event_constraint *c;
72eae04d
RR
1380 int err;
1381
cdd6c482 1382 pr_info("Performance Events: ");
1123e3ad 1383
b56a3802
JSR
1384 switch (boot_cpu_data.x86_vendor) {
1385 case X86_VENDOR_INTEL:
72eae04d 1386 err = intel_pmu_init();
b56a3802 1387 break;
f87ad35d 1388 case X86_VENDOR_AMD:
72eae04d 1389 err = amd_pmu_init();
f87ad35d 1390 break;
4138960a
RR
1391 default:
1392 return;
b56a3802 1393 }
1123e3ad 1394 if (err != 0) {
cdd6c482 1395 pr_cont("no PMU driver, software events only.\n");
b56a3802 1396 return;
1123e3ad 1397 }
b56a3802 1398
12558038
CG
1399 pmu_check_apic();
1400
1123e3ad 1401 pr_cont("%s PMU driver.\n", x86_pmu.name);
faa28ae0 1402
3c44780b
PZ
1403 if (x86_pmu.quirks)
1404 x86_pmu.quirks();
1405
948b1bb8 1406 if (x86_pmu.num_counters > X86_PMC_MAX_GENERIC) {
cdd6c482 1407 WARN(1, KERN_ERR "hw perf events %d > max(%d), clipping!",
948b1bb8
RR
1408 x86_pmu.num_counters, X86_PMC_MAX_GENERIC);
1409 x86_pmu.num_counters = X86_PMC_MAX_GENERIC;
241771ef 1410 }
948b1bb8
RR
1411 x86_pmu.intel_ctrl = (1 << x86_pmu.num_counters) - 1;
1412 perf_max_events = x86_pmu.num_counters;
241771ef 1413
948b1bb8 1414 if (x86_pmu.num_counters_fixed > X86_PMC_MAX_FIXED) {
cdd6c482 1415 WARN(1, KERN_ERR "hw perf events fixed %d > max(%d), clipping!",
948b1bb8
RR
1416 x86_pmu.num_counters_fixed, X86_PMC_MAX_FIXED);
1417 x86_pmu.num_counters_fixed = X86_PMC_MAX_FIXED;
703e937c 1418 }
862a1a5f 1419
d6dc0b4e 1420 x86_pmu.intel_ctrl |=
948b1bb8 1421 ((1LL << x86_pmu.num_counters_fixed)-1) << X86_PMC_IDX_FIXED;
241771ef 1422
cdd6c482
IM
1423 perf_events_lapic_init();
1424 register_die_notifier(&perf_event_nmi_notifier);
1123e3ad 1425
63b14649 1426 unconstrained = (struct event_constraint)
948b1bb8
RR
1427 __EVENT_CONSTRAINT(0, (1ULL << x86_pmu.num_counters) - 1,
1428 0, x86_pmu.num_counters);
63b14649 1429
b622d644
PZ
1430 if (x86_pmu.event_constraints) {
1431 for_each_event_constraint(c, x86_pmu.event_constraints) {
a098f448 1432 if (c->cmask != X86_RAW_EVENT_MASK)
b622d644
PZ
1433 continue;
1434
948b1bb8
RR
1435 c->idxmsk64 |= (1ULL << x86_pmu.num_counters) - 1;
1436 c->weight += x86_pmu.num_counters;
b622d644
PZ
1437 }
1438 }
1439
57c0c15b 1440 pr_info("... version: %d\n", x86_pmu.version);
948b1bb8
RR
1441 pr_info("... bit width: %d\n", x86_pmu.cntval_bits);
1442 pr_info("... generic registers: %d\n", x86_pmu.num_counters);
1443 pr_info("... value mask: %016Lx\n", x86_pmu.cntval_mask);
57c0c15b 1444 pr_info("... max period: %016Lx\n", x86_pmu.max_period);
948b1bb8 1445 pr_info("... fixed-purpose events: %d\n", x86_pmu.num_counters_fixed);
d6dc0b4e 1446 pr_info("... event mask: %016Lx\n", x86_pmu.intel_ctrl);
3f6da390
PZ
1447
1448 perf_cpu_notifier(x86_pmu_notifier);
241771ef 1449}
621a01ea 1450
cdd6c482 1451static inline void x86_pmu_read(struct perf_event *event)
ee06094f 1452{
cc2ad4ba 1453 x86_perf_event_update(event);
ee06094f
IM
1454}
1455
4aeb0b42
RR
1456static const struct pmu pmu = {
1457 .enable = x86_pmu_enable,
1458 .disable = x86_pmu_disable,
d76a0812
SE
1459 .start = x86_pmu_start,
1460 .stop = x86_pmu_stop,
4aeb0b42 1461 .read = x86_pmu_read,
a78ac325 1462 .unthrottle = x86_pmu_unthrottle,
621a01ea
IM
1463};
1464
ca037701
PZ
1465/*
1466 * validate that we can schedule this event
1467 */
1468static int validate_event(struct perf_event *event)
1469{
1470 struct cpu_hw_events *fake_cpuc;
1471 struct event_constraint *c;
1472 int ret = 0;
1473
1474 fake_cpuc = kmalloc(sizeof(*fake_cpuc), GFP_KERNEL | __GFP_ZERO);
1475 if (!fake_cpuc)
1476 return -ENOMEM;
1477
1478 c = x86_pmu.get_event_constraints(fake_cpuc, event);
1479
1480 if (!c || !c->weight)
1481 ret = -ENOSPC;
1482
1483 if (x86_pmu.put_event_constraints)
1484 x86_pmu.put_event_constraints(fake_cpuc, event);
1485
1486 kfree(fake_cpuc);
1487
1488 return ret;
1489}
1490
1da53e02
SE
1491/*
1492 * validate a single event group
1493 *
1494 * validation include:
184f412c
IM
1495 * - check events are compatible which each other
1496 * - events do not compete for the same counter
1497 * - number of events <= number of counters
1da53e02
SE
1498 *
1499 * validation ensures the group can be loaded onto the
1500 * PMU if it was the only group available.
1501 */
fe9081cc
PZ
1502static int validate_group(struct perf_event *event)
1503{
1da53e02 1504 struct perf_event *leader = event->group_leader;
502568d5
PZ
1505 struct cpu_hw_events *fake_cpuc;
1506 int ret, n;
fe9081cc 1507
502568d5
PZ
1508 ret = -ENOMEM;
1509 fake_cpuc = kmalloc(sizeof(*fake_cpuc), GFP_KERNEL | __GFP_ZERO);
1510 if (!fake_cpuc)
1511 goto out;
fe9081cc 1512
1da53e02
SE
1513 /*
1514 * the event is not yet connected with its
1515 * siblings therefore we must first collect
1516 * existing siblings, then add the new event
1517 * before we can simulate the scheduling
1518 */
502568d5
PZ
1519 ret = -ENOSPC;
1520 n = collect_events(fake_cpuc, leader, true);
1da53e02 1521 if (n < 0)
502568d5 1522 goto out_free;
fe9081cc 1523
502568d5
PZ
1524 fake_cpuc->n_events = n;
1525 n = collect_events(fake_cpuc, event, false);
1da53e02 1526 if (n < 0)
502568d5 1527 goto out_free;
fe9081cc 1528
502568d5 1529 fake_cpuc->n_events = n;
1da53e02 1530
a072738e 1531 ret = x86_pmu.schedule_events(fake_cpuc, n, NULL);
502568d5
PZ
1532
1533out_free:
1534 kfree(fake_cpuc);
1535out:
1536 return ret;
fe9081cc
PZ
1537}
1538
cdd6c482 1539const struct pmu *hw_perf_event_init(struct perf_event *event)
621a01ea 1540{
8113070d 1541 const struct pmu *tmp;
621a01ea
IM
1542 int err;
1543
cdd6c482 1544 err = __hw_perf_event_init(event);
fe9081cc 1545 if (!err) {
8113070d
SE
1546 /*
1547 * we temporarily connect event to its pmu
1548 * such that validate_group() can classify
1549 * it as an x86 event using is_x86_event()
1550 */
1551 tmp = event->pmu;
1552 event->pmu = &pmu;
1553
fe9081cc
PZ
1554 if (event->group_leader != event)
1555 err = validate_group(event);
ca037701
PZ
1556 else
1557 err = validate_event(event);
8113070d
SE
1558
1559 event->pmu = tmp;
fe9081cc 1560 }
a1792cda 1561 if (err) {
cdd6c482
IM
1562 if (event->destroy)
1563 event->destroy(event);
9ea98e19 1564 return ERR_PTR(err);
a1792cda 1565 }
621a01ea 1566
4aeb0b42 1567 return &pmu;
621a01ea 1568}
d7d59fb3
PZ
1569
1570/*
1571 * callchain support
1572 */
1573
1574static inline
f9188e02 1575void callchain_store(struct perf_callchain_entry *entry, u64 ip)
d7d59fb3 1576{
f9188e02 1577 if (entry->nr < PERF_MAX_STACK_DEPTH)
d7d59fb3
PZ
1578 entry->ip[entry->nr++] = ip;
1579}
1580
245b2e70
TH
1581static DEFINE_PER_CPU(struct perf_callchain_entry, pmc_irq_entry);
1582static DEFINE_PER_CPU(struct perf_callchain_entry, pmc_nmi_entry);
d7d59fb3
PZ
1583
1584
1585static void
1586backtrace_warning_symbol(void *data, char *msg, unsigned long symbol)
1587{
1588 /* Ignore warnings */
1589}
1590
1591static void backtrace_warning(void *data, char *msg)
1592{
1593 /* Ignore warnings */
1594}
1595
1596static int backtrace_stack(void *data, char *name)
1597{
038e836e 1598 return 0;
d7d59fb3
PZ
1599}
1600
1601static void backtrace_address(void *data, unsigned long addr, int reliable)
1602{
1603 struct perf_callchain_entry *entry = data;
1604
1605 if (reliable)
1606 callchain_store(entry, addr);
1607}
1608
1609static const struct stacktrace_ops backtrace_ops = {
1610 .warning = backtrace_warning,
1611 .warning_symbol = backtrace_warning_symbol,
1612 .stack = backtrace_stack,
1613 .address = backtrace_address,
06d65bda 1614 .walk_stack = print_context_stack_bp,
d7d59fb3
PZ
1615};
1616
038e836e
IM
1617#include "../dumpstack.h"
1618
d7d59fb3
PZ
1619static void
1620perf_callchain_kernel(struct pt_regs *regs, struct perf_callchain_entry *entry)
1621{
f9188e02 1622 callchain_store(entry, PERF_CONTEXT_KERNEL);
038e836e 1623 callchain_store(entry, regs->ip);
d7d59fb3 1624
48b5ba9c 1625 dump_trace(NULL, regs, NULL, regs->bp, &backtrace_ops, entry);
d7d59fb3
PZ
1626}
1627
257ef9d2
TE
1628#ifdef CONFIG_COMPAT
1629static inline int
1630perf_callchain_user32(struct pt_regs *regs, struct perf_callchain_entry *entry)
74193ef0 1631{
257ef9d2
TE
1632 /* 32-bit process in 64-bit kernel. */
1633 struct stack_frame_ia32 frame;
1634 const void __user *fp;
74193ef0 1635
257ef9d2
TE
1636 if (!test_thread_flag(TIF_IA32))
1637 return 0;
1638
1639 fp = compat_ptr(regs->bp);
1640 while (entry->nr < PERF_MAX_STACK_DEPTH) {
1641 unsigned long bytes;
1642 frame.next_frame = 0;
1643 frame.return_address = 0;
1644
1645 bytes = copy_from_user_nmi(&frame, fp, sizeof(frame));
1646 if (bytes != sizeof(frame))
1647 break;
74193ef0 1648
257ef9d2
TE
1649 if (fp < compat_ptr(regs->sp))
1650 break;
74193ef0 1651
257ef9d2
TE
1652 callchain_store(entry, frame.return_address);
1653 fp = compat_ptr(frame.next_frame);
1654 }
1655 return 1;
d7d59fb3 1656}
257ef9d2
TE
1657#else
1658static inline int
1659perf_callchain_user32(struct pt_regs *regs, struct perf_callchain_entry *entry)
1660{
1661 return 0;
1662}
1663#endif
d7d59fb3
PZ
1664
1665static void
1666perf_callchain_user(struct pt_regs *regs, struct perf_callchain_entry *entry)
1667{
1668 struct stack_frame frame;
1669 const void __user *fp;
1670
5a6cec3a
IM
1671 if (!user_mode(regs))
1672 regs = task_pt_regs(current);
1673
74193ef0 1674 fp = (void __user *)regs->bp;
d7d59fb3 1675
f9188e02 1676 callchain_store(entry, PERF_CONTEXT_USER);
d7d59fb3
PZ
1677 callchain_store(entry, regs->ip);
1678
257ef9d2
TE
1679 if (perf_callchain_user32(regs, entry))
1680 return;
1681
f9188e02 1682 while (entry->nr < PERF_MAX_STACK_DEPTH) {
257ef9d2 1683 unsigned long bytes;
038e836e 1684 frame.next_frame = NULL;
d7d59fb3
PZ
1685 frame.return_address = 0;
1686
257ef9d2
TE
1687 bytes = copy_from_user_nmi(&frame, fp, sizeof(frame));
1688 if (bytes != sizeof(frame))
d7d59fb3
PZ
1689 break;
1690
5a6cec3a 1691 if ((unsigned long)fp < regs->sp)
d7d59fb3
PZ
1692 break;
1693
1694 callchain_store(entry, frame.return_address);
038e836e 1695 fp = frame.next_frame;
d7d59fb3
PZ
1696 }
1697}
1698
1699static void
1700perf_do_callchain(struct pt_regs *regs, struct perf_callchain_entry *entry)
1701{
1702 int is_user;
1703
1704 if (!regs)
1705 return;
1706
1707 is_user = user_mode(regs);
1708
d7d59fb3
PZ
1709 if (is_user && current->state != TASK_RUNNING)
1710 return;
1711
1712 if (!is_user)
1713 perf_callchain_kernel(regs, entry);
1714
1715 if (current->mm)
1716 perf_callchain_user(regs, entry);
1717}
1718
1719struct perf_callchain_entry *perf_callchain(struct pt_regs *regs)
1720{
1721 struct perf_callchain_entry *entry;
1722
1723 if (in_nmi())
245b2e70 1724 entry = &__get_cpu_var(pmc_nmi_entry);
d7d59fb3 1725 else
245b2e70 1726 entry = &__get_cpu_var(pmc_irq_entry);
d7d59fb3
PZ
1727
1728 entry->nr = 0;
1729
1730 perf_do_callchain(regs, entry);
1731
1732 return entry;
1733}
5331d7b8
FW
1734
1735void perf_arch_fetch_caller_regs(struct pt_regs *regs, unsigned long ip, int skip)
1736{
1737 regs->ip = ip;
1738 /*
1739 * perf_arch_fetch_caller_regs adds another call, we need to increment
1740 * the skip level
1741 */
1742 regs->bp = rewind_frame_pointer(skip + 1);
1743 regs->cs = __KERNEL_CS;
1744 local_save_flags(regs->flags);
1745}