]> git.proxmox.com Git - mirror_ubuntu-artful-kernel.git/blame - arch/x86/kernel/cpu/perf_counter.c
x86: prepare perf_counter to add more cpus
[mirror_ubuntu-artful-kernel.git] / arch / x86 / kernel / cpu / perf_counter.c
CommitLineData
241771ef
IM
1/*
2 * Performance counter x86 architecture code
3 *
4 * Copyright(C) 2008 Thomas Gleixner <tglx@linutronix.de>
5 * Copyright(C) 2008 Red Hat, Inc., Ingo Molnar
b56a3802 6 * Copyright(C) 2009 Jaswinder Singh Rajput
241771ef
IM
7 *
8 * For licencing details see kernel-base/COPYING
9 */
10
11#include <linux/perf_counter.h>
12#include <linux/capability.h>
13#include <linux/notifier.h>
14#include <linux/hardirq.h>
15#include <linux/kprobes.h>
4ac13294 16#include <linux/module.h>
241771ef
IM
17#include <linux/kdebug.h>
18#include <linux/sched.h>
19
5c167b85 20#include <asm/perf_counter.h>
241771ef
IM
21#include <asm/apic.h>
22
23static bool perf_counters_initialized __read_mostly;
24
25/*
26 * Number of (generic) HW counters:
27 */
862a1a5f
IM
28static int nr_counters_generic __read_mostly;
29static u64 perf_counter_mask __read_mostly;
2f18d1e8 30static u64 counter_value_mask __read_mostly;
241771ef 31
862a1a5f 32static int nr_counters_fixed __read_mostly;
703e937c 33
241771ef 34struct cpu_hw_counters {
862a1a5f
IM
35 struct perf_counter *counters[X86_PMC_IDX_MAX];
36 unsigned long used[BITS_TO_LONGS(X86_PMC_IDX_MAX)];
4b39fd96 37 unsigned long interrupts;
1b023a96 38 u64 global_enable;
241771ef
IM
39};
40
41/*
b56a3802 42 * struct pmc_x86_ops - performance counter x86 ops
241771ef 43 */
b56a3802
JSR
44struct pmc_x86_ops {
45 u64 (*save_disable_all) (void);
46 void (*restore_all) (u64 ctrl);
47 unsigned eventsel;
48 unsigned perfctr;
49 int (*event_map) (int event);
50 int max_events;
51};
52
53static struct pmc_x86_ops *pmc_ops;
54
241771ef
IM
55static DEFINE_PER_CPU(struct cpu_hw_counters, cpu_hw_counters);
56
b56a3802
JSR
57/*
58 * Intel PerfMon v3. Used on Core2 and later.
59 */
94c46572 60static const int intel_perfmon_event_map[] =
241771ef 61{
f650a672 62 [PERF_COUNT_CPU_CYCLES] = 0x003c,
241771ef
IM
63 [PERF_COUNT_INSTRUCTIONS] = 0x00c0,
64 [PERF_COUNT_CACHE_REFERENCES] = 0x4f2e,
65 [PERF_COUNT_CACHE_MISSES] = 0x412e,
66 [PERF_COUNT_BRANCH_INSTRUCTIONS] = 0x00c4,
67 [PERF_COUNT_BRANCH_MISSES] = 0x00c5,
f650a672 68 [PERF_COUNT_BUS_CYCLES] = 0x013c,
241771ef
IM
69};
70
b56a3802
JSR
71static int pmc_intel_event_map(int event)
72{
73 return intel_perfmon_event_map[event];
74}
241771ef 75
ee06094f
IM
76/*
77 * Propagate counter elapsed time into the generic counter.
78 * Can only be executed on the CPU where the counter is active.
79 * Returns the delta events processed.
80 */
81static void
82x86_perf_counter_update(struct perf_counter *counter,
83 struct hw_perf_counter *hwc, int idx)
84{
85 u64 prev_raw_count, new_raw_count, delta;
86
ee06094f
IM
87 /*
88 * Careful: an NMI might modify the previous counter value.
89 *
90 * Our tactic to handle this is to first atomically read and
91 * exchange a new raw count - then add that new-prev delta
92 * count to the generic counter atomically:
93 */
94again:
95 prev_raw_count = atomic64_read(&hwc->prev_count);
96 rdmsrl(hwc->counter_base + idx, new_raw_count);
97
98 if (atomic64_cmpxchg(&hwc->prev_count, prev_raw_count,
99 new_raw_count) != prev_raw_count)
100 goto again;
101
102 /*
103 * Now we have the new raw value and have updated the prev
104 * timestamp already. We can now calculate the elapsed delta
105 * (counter-)time and add that to the generic counter.
106 *
107 * Careful, not all hw sign-extends above the physical width
108 * of the count, so we do that by clipping the delta to 32 bits:
109 */
110 delta = (u64)(u32)((s32)new_raw_count - (s32)prev_raw_count);
ee06094f
IM
111
112 atomic64_add(delta, &counter->count);
113 atomic64_sub(delta, &hwc->period_left);
114}
115
241771ef
IM
116/*
117 * Setup the hardware configuration for a given hw_event_type
118 */
621a01ea 119static int __hw_perf_counter_init(struct perf_counter *counter)
241771ef 120{
9f66a381 121 struct perf_counter_hw_event *hw_event = &counter->hw_event;
241771ef
IM
122 struct hw_perf_counter *hwc = &counter->hw;
123
124 if (unlikely(!perf_counters_initialized))
125 return -EINVAL;
126
127 /*
0475f9ea 128 * Generate PMC IRQs:
241771ef
IM
129 * (keep 'enabled' bit clear for now)
130 */
0475f9ea 131 hwc->config = ARCH_PERFMON_EVENTSEL_INT;
241771ef
IM
132
133 /*
0475f9ea 134 * Count user and OS events unless requested not to.
241771ef 135 */
0475f9ea
PM
136 if (!hw_event->exclude_user)
137 hwc->config |= ARCH_PERFMON_EVENTSEL_USR;
138 if (!hw_event->exclude_kernel)
241771ef 139 hwc->config |= ARCH_PERFMON_EVENTSEL_OS;
0475f9ea
PM
140
141 /*
142 * If privileged enough, allow NMI events:
143 */
144 hwc->nmi = 0;
145 if (capable(CAP_SYS_ADMIN) && hw_event->nmi)
146 hwc->nmi = 1;
241771ef 147
9f66a381 148 hwc->irq_period = hw_event->irq_period;
241771ef
IM
149 /*
150 * Intel PMCs cannot be accessed sanely above 32 bit width,
151 * so we install an artificial 1<<31 period regardless of
152 * the generic counter period:
153 */
ee06094f 154 if ((s64)hwc->irq_period <= 0 || hwc->irq_period > 0x7FFFFFFF)
241771ef
IM
155 hwc->irq_period = 0x7FFFFFFF;
156
ee06094f 157 atomic64_set(&hwc->period_left, hwc->irq_period);
241771ef
IM
158
159 /*
dfa7c899 160 * Raw event type provide the config in the event structure
241771ef 161 */
9f66a381
IM
162 if (hw_event->raw) {
163 hwc->config |= hw_event->type;
241771ef 164 } else {
b56a3802 165 if (hw_event->type >= pmc_ops->max_events)
241771ef
IM
166 return -EINVAL;
167 /*
168 * The generic map:
169 */
b56a3802 170 hwc->config |= pmc_ops->event_map(hw_event->type);
241771ef 171 }
241771ef
IM
172 counter->wakeup_pending = 0;
173
174 return 0;
175}
176
b56a3802 177static u64 pmc_intel_save_disable_all(void)
4ac13294
TG
178{
179 u64 ctrl;
180
181 rdmsrl(MSR_CORE_PERF_GLOBAL_CTRL, ctrl);
862a1a5f 182 wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, 0);
2b9ff0db 183
4ac13294 184 return ctrl;
241771ef 185}
b56a3802
JSR
186
187u64 hw_perf_save_disable(void)
188{
189 if (unlikely(!perf_counters_initialized))
190 return 0;
191
192 return pmc_ops->save_disable_all();
193}
01b2838c 194EXPORT_SYMBOL_GPL(hw_perf_save_disable);
241771ef 195
b56a3802
JSR
196static void pmc_intel_restore_all(u64 ctrl)
197{
198 wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, ctrl);
199}
200
ee06094f
IM
201void hw_perf_restore(u64 ctrl)
202{
2b9ff0db
IM
203 if (unlikely(!perf_counters_initialized))
204 return;
205
b56a3802 206 pmc_ops->restore_all(ctrl);
ee06094f
IM
207}
208EXPORT_SYMBOL_GPL(hw_perf_restore);
209
2f18d1e8
IM
210static inline void
211__pmc_fixed_disable(struct perf_counter *counter,
212 struct hw_perf_counter *hwc, unsigned int __idx)
213{
214 int idx = __idx - X86_PMC_IDX_FIXED;
215 u64 ctrl_val, mask;
216 int err;
217
218 mask = 0xfULL << (idx * 4);
219
220 rdmsrl(hwc->config_base, ctrl_val);
221 ctrl_val &= ~mask;
222 err = checking_wrmsrl(hwc->config_base, ctrl_val);
223}
224
7e2ae347 225static inline void
eb2b8618 226__pmc_generic_disable(struct perf_counter *counter,
ee06094f 227 struct hw_perf_counter *hwc, unsigned int idx)
7e2ae347 228{
2f18d1e8 229 if (unlikely(hwc->config_base == MSR_ARCH_PERFMON_FIXED_CTR_CTRL))
2b583d8b
JSR
230 __pmc_fixed_disable(counter, hwc, idx);
231 else
232 wrmsr_safe(hwc->config_base + idx, hwc->config, 0);
7e2ae347
IM
233}
234
2f18d1e8 235static DEFINE_PER_CPU(u64, prev_left[X86_PMC_IDX_MAX]);
241771ef 236
ee06094f
IM
237/*
238 * Set the next IRQ period, based on the hwc->period_left value.
239 * To be called with the counter disabled in hw:
240 */
241static void
242__hw_perf_counter_set_period(struct perf_counter *counter,
243 struct hw_perf_counter *hwc, int idx)
241771ef 244{
2f18d1e8 245 s64 left = atomic64_read(&hwc->period_left);
ee06094f 246 s32 period = hwc->irq_period;
2f18d1e8 247 int err;
ee06094f 248
ee06094f
IM
249 /*
250 * If we are way outside a reasoable range then just skip forward:
251 */
252 if (unlikely(left <= -period)) {
253 left = period;
254 atomic64_set(&hwc->period_left, left);
255 }
256
257 if (unlikely(left <= 0)) {
258 left += period;
259 atomic64_set(&hwc->period_left, left);
260 }
241771ef 261
ee06094f
IM
262 per_cpu(prev_left[idx], smp_processor_id()) = left;
263
264 /*
265 * The hw counter starts counting from this counter offset,
266 * mark it to be able to extra future deltas:
267 */
2f18d1e8 268 atomic64_set(&hwc->prev_count, (u64)-left);
ee06094f 269
2f18d1e8
IM
270 err = checking_wrmsrl(hwc->counter_base + idx,
271 (u64)(-left) & counter_value_mask);
272}
273
274static inline void
275__pmc_fixed_enable(struct perf_counter *counter,
276 struct hw_perf_counter *hwc, unsigned int __idx)
277{
278 int idx = __idx - X86_PMC_IDX_FIXED;
279 u64 ctrl_val, bits, mask;
280 int err;
281
282 /*
0475f9ea
PM
283 * Enable IRQ generation (0x8),
284 * and enable ring-3 counting (0x2) and ring-0 counting (0x1)
285 * if requested:
2f18d1e8 286 */
0475f9ea
PM
287 bits = 0x8ULL;
288 if (hwc->config & ARCH_PERFMON_EVENTSEL_USR)
289 bits |= 0x2;
2f18d1e8
IM
290 if (hwc->config & ARCH_PERFMON_EVENTSEL_OS)
291 bits |= 0x1;
292 bits <<= (idx * 4);
293 mask = 0xfULL << (idx * 4);
294
295 rdmsrl(hwc->config_base, ctrl_val);
296 ctrl_val &= ~mask;
297 ctrl_val |= bits;
298 err = checking_wrmsrl(hwc->config_base, ctrl_val);
7e2ae347
IM
299}
300
ee06094f 301static void
eb2b8618 302__pmc_generic_enable(struct perf_counter *counter,
ee06094f 303 struct hw_perf_counter *hwc, int idx)
7e2ae347 304{
2f18d1e8 305 if (unlikely(hwc->config_base == MSR_ARCH_PERFMON_FIXED_CTR_CTRL))
2b583d8b
JSR
306 __pmc_fixed_enable(counter, hwc, idx);
307 else
308 wrmsr(hwc->config_base + idx,
309 hwc->config | ARCH_PERFMON_EVENTSEL0_ENABLE, 0);
241771ef
IM
310}
311
2f18d1e8
IM
312static int
313fixed_mode_idx(struct perf_counter *counter, struct hw_perf_counter *hwc)
862a1a5f 314{
2f18d1e8
IM
315 unsigned int event;
316
317 if (unlikely(hwc->nmi))
318 return -1;
319
320 event = hwc->config & ARCH_PERFMON_EVENT_MASK;
321
b56a3802 322 if (unlikely(event == pmc_ops->event_map(PERF_COUNT_INSTRUCTIONS)))
2f18d1e8 323 return X86_PMC_IDX_FIXED_INSTRUCTIONS;
b56a3802 324 if (unlikely(event == pmc_ops->event_map(PERF_COUNT_CPU_CYCLES)))
2f18d1e8 325 return X86_PMC_IDX_FIXED_CPU_CYCLES;
b56a3802 326 if (unlikely(event == pmc_ops->event_map(PERF_COUNT_BUS_CYCLES)))
2f18d1e8
IM
327 return X86_PMC_IDX_FIXED_BUS_CYCLES;
328
862a1a5f
IM
329 return -1;
330}
331
ee06094f
IM
332/*
333 * Find a PMC slot for the freshly enabled / scheduled in counter:
334 */
95cdd2e7 335static int pmc_generic_enable(struct perf_counter *counter)
241771ef
IM
336{
337 struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters);
338 struct hw_perf_counter *hwc = &counter->hw;
2f18d1e8 339 int idx;
241771ef 340
2f18d1e8
IM
341 idx = fixed_mode_idx(counter, hwc);
342 if (idx >= 0) {
343 /*
344 * Try to get the fixed counter, if that is already taken
345 * then try to get a generic counter:
346 */
347 if (test_and_set_bit(idx, cpuc->used))
348 goto try_generic;
0dff86aa 349
2f18d1e8
IM
350 hwc->config_base = MSR_ARCH_PERFMON_FIXED_CTR_CTRL;
351 /*
352 * We set it so that counter_base + idx in wrmsr/rdmsr maps to
353 * MSR_ARCH_PERFMON_FIXED_CTR0 ... CTR2:
354 */
355 hwc->counter_base =
356 MSR_ARCH_PERFMON_FIXED_CTR0 - X86_PMC_IDX_FIXED;
241771ef 357 hwc->idx = idx;
2f18d1e8
IM
358 } else {
359 idx = hwc->idx;
360 /* Try to get the previous generic counter again */
361 if (test_and_set_bit(idx, cpuc->used)) {
362try_generic:
363 idx = find_first_zero_bit(cpuc->used, nr_counters_generic);
364 if (idx == nr_counters_generic)
365 return -EAGAIN;
366
367 set_bit(idx, cpuc->used);
368 hwc->idx = idx;
369 }
b56a3802
JSR
370 hwc->config_base = pmc_ops->eventsel;
371 hwc->counter_base = pmc_ops->perfctr;
241771ef
IM
372 }
373
374 perf_counters_lapic_init(hwc->nmi);
375
eb2b8618 376 __pmc_generic_disable(counter, hwc, idx);
241771ef 377
862a1a5f 378 cpuc->counters[idx] = counter;
2f18d1e8
IM
379 /*
380 * Make it visible before enabling the hw:
381 */
382 smp_wmb();
7e2ae347 383
ee06094f 384 __hw_perf_counter_set_period(counter, hwc, idx);
eb2b8618 385 __pmc_generic_enable(counter, hwc, idx);
95cdd2e7
IM
386
387 return 0;
241771ef
IM
388}
389
390void perf_counter_print_debug(void)
391{
2f18d1e8 392 u64 ctrl, status, overflow, pmc_ctrl, pmc_count, prev_left, fixed;
0dff86aa 393 struct cpu_hw_counters *cpuc;
1e125676
IM
394 int cpu, idx;
395
862a1a5f 396 if (!nr_counters_generic)
1e125676 397 return;
241771ef
IM
398
399 local_irq_disable();
400
401 cpu = smp_processor_id();
0dff86aa 402 cpuc = &per_cpu(cpu_hw_counters, cpu);
241771ef 403
1e125676
IM
404 rdmsrl(MSR_CORE_PERF_GLOBAL_CTRL, ctrl);
405 rdmsrl(MSR_CORE_PERF_GLOBAL_STATUS, status);
406 rdmsrl(MSR_CORE_PERF_GLOBAL_OVF_CTRL, overflow);
2f18d1e8 407 rdmsrl(MSR_ARCH_PERFMON_FIXED_CTR_CTRL, fixed);
241771ef
IM
408
409 printk(KERN_INFO "\n");
410 printk(KERN_INFO "CPU#%d: ctrl: %016llx\n", cpu, ctrl);
411 printk(KERN_INFO "CPU#%d: status: %016llx\n", cpu, status);
412 printk(KERN_INFO "CPU#%d: overflow: %016llx\n", cpu, overflow);
2f18d1e8 413 printk(KERN_INFO "CPU#%d: fixed: %016llx\n", cpu, fixed);
0dff86aa 414 printk(KERN_INFO "CPU#%d: used: %016llx\n", cpu, *(u64 *)cpuc->used);
241771ef 415
862a1a5f 416 for (idx = 0; idx < nr_counters_generic; idx++) {
b56a3802
JSR
417 rdmsrl(pmc_ops->eventsel + idx, pmc_ctrl);
418 rdmsrl(pmc_ops->perfctr + idx, pmc_count);
241771ef 419
ee06094f 420 prev_left = per_cpu(prev_left[idx], cpu);
241771ef 421
2f18d1e8 422 printk(KERN_INFO "CPU#%d: gen-PMC%d ctrl: %016llx\n",
241771ef 423 cpu, idx, pmc_ctrl);
2f18d1e8 424 printk(KERN_INFO "CPU#%d: gen-PMC%d count: %016llx\n",
241771ef 425 cpu, idx, pmc_count);
2f18d1e8 426 printk(KERN_INFO "CPU#%d: gen-PMC%d left: %016llx\n",
ee06094f 427 cpu, idx, prev_left);
241771ef 428 }
2f18d1e8
IM
429 for (idx = 0; idx < nr_counters_fixed; idx++) {
430 rdmsrl(MSR_ARCH_PERFMON_FIXED_CTR0 + idx, pmc_count);
431
432 printk(KERN_INFO "CPU#%d: fixed-PMC%d count: %016llx\n",
433 cpu, idx, pmc_count);
434 }
241771ef
IM
435 local_irq_enable();
436}
437
eb2b8618 438static void pmc_generic_disable(struct perf_counter *counter)
241771ef
IM
439{
440 struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters);
441 struct hw_perf_counter *hwc = &counter->hw;
442 unsigned int idx = hwc->idx;
443
eb2b8618 444 __pmc_generic_disable(counter, hwc, idx);
241771ef
IM
445
446 clear_bit(idx, cpuc->used);
862a1a5f 447 cpuc->counters[idx] = NULL;
2f18d1e8
IM
448 /*
449 * Make sure the cleared pointer becomes visible before we
450 * (potentially) free the counter:
451 */
452 smp_wmb();
241771ef 453
ee06094f
IM
454 /*
455 * Drain the remaining delta count out of a counter
456 * that we are disabling:
457 */
458 x86_perf_counter_update(counter, hwc, idx);
241771ef
IM
459}
460
461static void perf_store_irq_data(struct perf_counter *counter, u64 data)
462{
463 struct perf_data *irqdata = counter->irqdata;
464
465 if (irqdata->len > PERF_DATA_BUFLEN - sizeof(u64)) {
466 irqdata->overrun++;
467 } else {
468 u64 *p = (u64 *) &irqdata->data[irqdata->len];
469
470 *p = data;
471 irqdata->len += sizeof(u64);
472 }
473}
474
7e2ae347 475/*
ee06094f
IM
476 * Save and restart an expired counter. Called by NMI contexts,
477 * so it has to be careful about preempting normal counter ops:
7e2ae347 478 */
241771ef
IM
479static void perf_save_and_restart(struct perf_counter *counter)
480{
481 struct hw_perf_counter *hwc = &counter->hw;
482 int idx = hwc->idx;
241771ef 483
ee06094f
IM
484 x86_perf_counter_update(counter, hwc, idx);
485 __hw_perf_counter_set_period(counter, hwc, idx);
7e2ae347 486
2f18d1e8 487 if (counter->state == PERF_COUNTER_STATE_ACTIVE)
eb2b8618 488 __pmc_generic_enable(counter, hwc, idx);
241771ef
IM
489}
490
491static void
04289bb9 492perf_handle_group(struct perf_counter *sibling, u64 *status, u64 *overflown)
241771ef 493{
04289bb9 494 struct perf_counter *counter, *group_leader = sibling->group_leader;
241771ef 495
04289bb9 496 /*
ee06094f 497 * Store sibling timestamps (if any):
04289bb9
IM
498 */
499 list_for_each_entry(counter, &group_leader->sibling_list, list_entry) {
2f18d1e8 500
ee06094f 501 x86_perf_counter_update(counter, &counter->hw, counter->hw.idx);
04289bb9 502 perf_store_irq_data(sibling, counter->hw_event.type);
ee06094f 503 perf_store_irq_data(sibling, atomic64_read(&counter->count));
241771ef
IM
504 }
505}
506
4b39fd96
MG
507/*
508 * Maximum interrupt frequency of 100KHz per CPU
509 */
510#define PERFMON_MAX_INTERRUPTS 100000/HZ
511
241771ef
IM
512/*
513 * This handler is triggered by the local APIC, so the APIC IRQ handling
514 * rules apply:
515 */
516static void __smp_perf_counter_interrupt(struct pt_regs *regs, int nmi)
517{
518 int bit, cpu = smp_processor_id();
4b39fd96 519 u64 ack, status;
1b023a96 520 struct cpu_hw_counters *cpuc = &per_cpu(cpu_hw_counters, cpu);
43874d23 521
1b023a96 522 rdmsrl(MSR_CORE_PERF_GLOBAL_CTRL, cpuc->global_enable);
241771ef 523
241771ef 524 /* Disable counters globally */
862a1a5f 525 wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, 0);
241771ef
IM
526 ack_APIC_irq();
527
87b9cf46
IM
528 rdmsrl(MSR_CORE_PERF_GLOBAL_STATUS, status);
529 if (!status)
530 goto out;
531
241771ef 532again:
d278c484 533 inc_irq_stat(apic_perf_irqs);
241771ef 534 ack = status;
2f18d1e8 535 for_each_bit(bit, (unsigned long *)&status, X86_PMC_IDX_MAX) {
862a1a5f 536 struct perf_counter *counter = cpuc->counters[bit];
241771ef
IM
537
538 clear_bit(bit, (unsigned long *) &status);
539 if (!counter)
540 continue;
541
542 perf_save_and_restart(counter);
543
9f66a381 544 switch (counter->hw_event.record_type) {
241771ef
IM
545 case PERF_RECORD_SIMPLE:
546 continue;
547 case PERF_RECORD_IRQ:
548 perf_store_irq_data(counter, instruction_pointer(regs));
549 break;
550 case PERF_RECORD_GROUP:
241771ef
IM
551 perf_handle_group(counter, &status, &ack);
552 break;
553 }
554 /*
555 * From NMI context we cannot call into the scheduler to
eb2b8618 556 * do a task wakeup - but we mark these generic as
241771ef
IM
557 * wakeup_pending and initate a wakeup callback:
558 */
559 if (nmi) {
560 counter->wakeup_pending = 1;
561 set_tsk_thread_flag(current, TIF_PERF_COUNTERS);
562 } else {
563 wake_up(&counter->waitq);
564 }
565 }
566
862a1a5f 567 wrmsrl(MSR_CORE_PERF_GLOBAL_OVF_CTRL, ack);
241771ef
IM
568
569 /*
570 * Repeat if there is more work to be done:
571 */
572 rdmsrl(MSR_CORE_PERF_GLOBAL_STATUS, status);
573 if (status)
574 goto again;
87b9cf46 575out:
241771ef 576 /*
1b023a96 577 * Restore - do not reenable when global enable is off or throttled:
241771ef 578 */
4b39fd96 579 if (++cpuc->interrupts < PERFMON_MAX_INTERRUPTS)
1b023a96
MG
580 wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, cpuc->global_enable);
581}
582
583void perf_counter_unthrottle(void)
584{
585 struct cpu_hw_counters *cpuc;
4b39fd96 586 u64 global_enable;
1b023a96
MG
587
588 if (!cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON))
589 return;
590
591 if (unlikely(!perf_counters_initialized))
592 return;
593
594 cpuc = &per_cpu(cpu_hw_counters, smp_processor_id());
4b39fd96 595 if (cpuc->interrupts >= PERFMON_MAX_INTERRUPTS) {
1b023a96 596 if (printk_ratelimit())
4b39fd96 597 printk(KERN_WARNING "PERFMON: max interrupts exceeded!\n");
1b023a96 598 wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, cpuc->global_enable);
1b023a96 599 }
4b39fd96
MG
600 rdmsrl(MSR_CORE_PERF_GLOBAL_CTRL, global_enable);
601 if (unlikely(cpuc->global_enable && !global_enable))
602 wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, cpuc->global_enable);
603 cpuc->interrupts = 0;
241771ef
IM
604}
605
606void smp_perf_counter_interrupt(struct pt_regs *regs)
607{
608 irq_enter();
241771ef
IM
609 apic_write(APIC_LVTPC, LOCAL_PERF_VECTOR);
610 __smp_perf_counter_interrupt(regs, 0);
611
612 irq_exit();
613}
614
615/*
616 * This handler is triggered by NMI contexts:
617 */
618void perf_counter_notify(struct pt_regs *regs)
619{
620 struct cpu_hw_counters *cpuc;
621 unsigned long flags;
622 int bit, cpu;
623
624 local_irq_save(flags);
625 cpu = smp_processor_id();
626 cpuc = &per_cpu(cpu_hw_counters, cpu);
627
862a1a5f
IM
628 for_each_bit(bit, cpuc->used, X86_PMC_IDX_MAX) {
629 struct perf_counter *counter = cpuc->counters[bit];
241771ef
IM
630
631 if (!counter)
632 continue;
633
634 if (counter->wakeup_pending) {
635 counter->wakeup_pending = 0;
636 wake_up(&counter->waitq);
637 }
638 }
639
640 local_irq_restore(flags);
641}
642
3415dd91 643void perf_counters_lapic_init(int nmi)
241771ef
IM
644{
645 u32 apic_val;
646
647 if (!perf_counters_initialized)
648 return;
649 /*
650 * Enable the performance counter vector in the APIC LVT:
651 */
652 apic_val = apic_read(APIC_LVTERR);
653
654 apic_write(APIC_LVTERR, apic_val | APIC_LVT_MASKED);
655 if (nmi)
656 apic_write(APIC_LVTPC, APIC_DM_NMI);
657 else
658 apic_write(APIC_LVTPC, LOCAL_PERF_VECTOR);
659 apic_write(APIC_LVTERR, apic_val);
660}
661
662static int __kprobes
663perf_counter_nmi_handler(struct notifier_block *self,
664 unsigned long cmd, void *__args)
665{
666 struct die_args *args = __args;
667 struct pt_regs *regs;
668
669 if (likely(cmd != DIE_NMI_IPI))
670 return NOTIFY_DONE;
671
672 regs = args->regs;
673
674 apic_write(APIC_LVTPC, APIC_DM_NMI);
675 __smp_perf_counter_interrupt(regs, 1);
676
677 return NOTIFY_STOP;
678}
679
680static __read_mostly struct notifier_block perf_counter_nmi_notifier = {
5b75af0a
MG
681 .notifier_call = perf_counter_nmi_handler,
682 .next = NULL,
683 .priority = 1
241771ef
IM
684};
685
b56a3802
JSR
686static struct pmc_x86_ops pmc_intel_ops = {
687 .save_disable_all = pmc_intel_save_disable_all,
688 .restore_all = pmc_intel_restore_all,
689 .eventsel = MSR_ARCH_PERFMON_EVENTSEL0,
690 .perfctr = MSR_ARCH_PERFMON_PERFCTR0,
691 .event_map = pmc_intel_event_map,
692 .max_events = ARRAY_SIZE(intel_perfmon_event_map),
693};
694
695static struct pmc_x86_ops *pmc_intel_init(void)
241771ef
IM
696{
697 union cpuid10_eax eax;
241771ef 698 unsigned int ebx;
703e937c
IM
699 unsigned int unused;
700 union cpuid10_edx edx;
241771ef 701
241771ef
IM
702 /*
703 * Check whether the Architectural PerfMon supports
704 * Branch Misses Retired Event or not.
705 */
703e937c 706 cpuid(10, &eax.full, &ebx, &unused, &edx.full);
241771ef 707 if (eax.split.mask_length <= ARCH_PERFMON_BRANCH_MISSES_RETIRED)
b56a3802 708 return NULL;
241771ef
IM
709
710 printk(KERN_INFO "Intel Performance Monitoring support detected.\n");
703e937c 711 printk(KERN_INFO "... version: %d\n", eax.split.version_id);
b56a3802
JSR
712 printk(KERN_INFO "... bit width: %d\n", eax.split.bit_width);
713 printk(KERN_INFO "... mask length: %d\n", eax.split.mask_length);
714
862a1a5f 715 nr_counters_generic = eax.split.num_counters;
b56a3802
JSR
716 nr_counters_fixed = edx.split.num_counters_fixed;
717 counter_value_mask = (1ULL << eax.split.bit_width) - 1;
718
719 return &pmc_intel_ops;
720}
721
722void __init init_hw_perf_counters(void)
723{
724 if (!cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON))
725 return;
726
727 switch (boot_cpu_data.x86_vendor) {
728 case X86_VENDOR_INTEL:
729 pmc_ops = pmc_intel_init();
730 break;
731 }
732 if (!pmc_ops)
733 return;
734
735 printk(KERN_INFO "... num counters: %d\n", nr_counters_generic);
862a1a5f
IM
736 if (nr_counters_generic > X86_PMC_MAX_GENERIC) {
737 nr_counters_generic = X86_PMC_MAX_GENERIC;
241771ef 738 WARN(1, KERN_ERR "hw perf counters %d > max(%d), clipping!",
862a1a5f 739 nr_counters_generic, X86_PMC_MAX_GENERIC);
241771ef 740 }
862a1a5f
IM
741 perf_counter_mask = (1 << nr_counters_generic) - 1;
742 perf_max_counters = nr_counters_generic;
241771ef 743
2f18d1e8
IM
744 printk(KERN_INFO "... value mask: %016Lx\n", counter_value_mask);
745
862a1a5f
IM
746 if (nr_counters_fixed > X86_PMC_MAX_FIXED) {
747 nr_counters_fixed = X86_PMC_MAX_FIXED;
703e937c 748 WARN(1, KERN_ERR "hw perf counters fixed %d > max(%d), clipping!",
862a1a5f 749 nr_counters_fixed, X86_PMC_MAX_FIXED);
703e937c 750 }
862a1a5f
IM
751 printk(KERN_INFO "... fixed counters: %d\n", nr_counters_fixed);
752
753 perf_counter_mask |= ((1LL << nr_counters_fixed)-1) << X86_PMC_IDX_FIXED;
241771ef 754
862a1a5f 755 printk(KERN_INFO "... counter mask: %016Lx\n", perf_counter_mask);
75f224cf
IM
756 perf_counters_initialized = true;
757
241771ef
IM
758 perf_counters_lapic_init(0);
759 register_die_notifier(&perf_counter_nmi_notifier);
241771ef 760}
621a01ea 761
eb2b8618 762static void pmc_generic_read(struct perf_counter *counter)
ee06094f
IM
763{
764 x86_perf_counter_update(counter, &counter->hw, counter->hw.idx);
765}
766
5c92d124 767static const struct hw_perf_counter_ops x86_perf_counter_ops = {
7671581f
IM
768 .enable = pmc_generic_enable,
769 .disable = pmc_generic_disable,
770 .read = pmc_generic_read,
621a01ea
IM
771};
772
5c92d124
IM
773const struct hw_perf_counter_ops *
774hw_perf_counter_init(struct perf_counter *counter)
621a01ea
IM
775{
776 int err;
777
778 err = __hw_perf_counter_init(counter);
779 if (err)
780 return NULL;
781
782 return &x86_perf_counter_ops;
783}