]> git.proxmox.com Git - mirror_ubuntu-bionic-kernel.git/blame - arch/x86/kernel/cpu/perf_counter.c
perfcounters: fix reserved bits sizing
[mirror_ubuntu-bionic-kernel.git] / arch / x86 / kernel / cpu / perf_counter.c
CommitLineData
241771ef
IM
1/*
2 * Performance counter x86 architecture code
3 *
4 * Copyright(C) 2008 Thomas Gleixner <tglx@linutronix.de>
5 * Copyright(C) 2008 Red Hat, Inc., Ingo Molnar
b56a3802 6 * Copyright(C) 2009 Jaswinder Singh Rajput
241771ef
IM
7 *
8 * For licencing details see kernel-base/COPYING
9 */
10
11#include <linux/perf_counter.h>
12#include <linux/capability.h>
13#include <linux/notifier.h>
14#include <linux/hardirq.h>
15#include <linux/kprobes.h>
4ac13294 16#include <linux/module.h>
241771ef
IM
17#include <linux/kdebug.h>
18#include <linux/sched.h>
19
5c167b85 20#include <asm/perf_counter.h>
241771ef
IM
21#include <asm/apic.h>
22
23static bool perf_counters_initialized __read_mostly;
24
25/*
26 * Number of (generic) HW counters:
27 */
862a1a5f
IM
28static int nr_counters_generic __read_mostly;
29static u64 perf_counter_mask __read_mostly;
2f18d1e8 30static u64 counter_value_mask __read_mostly;
241771ef 31
862a1a5f 32static int nr_counters_fixed __read_mostly;
703e937c 33
241771ef 34struct cpu_hw_counters {
862a1a5f
IM
35 struct perf_counter *counters[X86_PMC_IDX_MAX];
36 unsigned long used[BITS_TO_LONGS(X86_PMC_IDX_MAX)];
4b39fd96 37 unsigned long interrupts;
1b023a96 38 u64 global_enable;
241771ef
IM
39};
40
41/*
b56a3802 42 * struct pmc_x86_ops - performance counter x86 ops
241771ef 43 */
b56a3802 44struct pmc_x86_ops {
169e41eb
JSR
45 u64 (*save_disable_all)(void);
46 void (*restore_all)(u64 ctrl);
47 unsigned eventsel;
48 unsigned perfctr;
49 int (*event_map)(int event);
50 int max_events;
b56a3802
JSR
51};
52
53static struct pmc_x86_ops *pmc_ops;
54
241771ef
IM
55static DEFINE_PER_CPU(struct cpu_hw_counters, cpu_hw_counters);
56
b56a3802
JSR
57/*
58 * Intel PerfMon v3. Used on Core2 and later.
59 */
94c46572 60static const int intel_perfmon_event_map[] =
241771ef 61{
f650a672 62 [PERF_COUNT_CPU_CYCLES] = 0x003c,
241771ef
IM
63 [PERF_COUNT_INSTRUCTIONS] = 0x00c0,
64 [PERF_COUNT_CACHE_REFERENCES] = 0x4f2e,
65 [PERF_COUNT_CACHE_MISSES] = 0x412e,
66 [PERF_COUNT_BRANCH_INSTRUCTIONS] = 0x00c4,
67 [PERF_COUNT_BRANCH_MISSES] = 0x00c5,
f650a672 68 [PERF_COUNT_BUS_CYCLES] = 0x013c,
241771ef
IM
69};
70
b56a3802
JSR
71static int pmc_intel_event_map(int event)
72{
73 return intel_perfmon_event_map[event];
74}
241771ef 75
f87ad35d
JSR
76/*
77 * AMD Performance Monitor K7 and later.
78 */
79static const int amd_perfmon_event_map[] =
80{
81 [PERF_COUNT_CPU_CYCLES] = 0x0076,
82 [PERF_COUNT_INSTRUCTIONS] = 0x00c0,
83 [PERF_COUNT_CACHE_REFERENCES] = 0x0080,
84 [PERF_COUNT_CACHE_MISSES] = 0x0081,
85 [PERF_COUNT_BRANCH_INSTRUCTIONS] = 0x00c4,
86 [PERF_COUNT_BRANCH_MISSES] = 0x00c5,
87};
88
89static int pmc_amd_event_map(int event)
90{
91 return amd_perfmon_event_map[event];
92}
93
ee06094f
IM
94/*
95 * Propagate counter elapsed time into the generic counter.
96 * Can only be executed on the CPU where the counter is active.
97 * Returns the delta events processed.
98 */
99static void
100x86_perf_counter_update(struct perf_counter *counter,
101 struct hw_perf_counter *hwc, int idx)
102{
103 u64 prev_raw_count, new_raw_count, delta;
104
ee06094f
IM
105 /*
106 * Careful: an NMI might modify the previous counter value.
107 *
108 * Our tactic to handle this is to first atomically read and
109 * exchange a new raw count - then add that new-prev delta
110 * count to the generic counter atomically:
111 */
112again:
113 prev_raw_count = atomic64_read(&hwc->prev_count);
114 rdmsrl(hwc->counter_base + idx, new_raw_count);
115
116 if (atomic64_cmpxchg(&hwc->prev_count, prev_raw_count,
117 new_raw_count) != prev_raw_count)
118 goto again;
119
120 /*
121 * Now we have the new raw value and have updated the prev
122 * timestamp already. We can now calculate the elapsed delta
123 * (counter-)time and add that to the generic counter.
124 *
125 * Careful, not all hw sign-extends above the physical width
126 * of the count, so we do that by clipping the delta to 32 bits:
127 */
128 delta = (u64)(u32)((s32)new_raw_count - (s32)prev_raw_count);
ee06094f
IM
129
130 atomic64_add(delta, &counter->count);
131 atomic64_sub(delta, &hwc->period_left);
132}
133
241771ef
IM
134/*
135 * Setup the hardware configuration for a given hw_event_type
136 */
621a01ea 137static int __hw_perf_counter_init(struct perf_counter *counter)
241771ef 138{
9f66a381 139 struct perf_counter_hw_event *hw_event = &counter->hw_event;
241771ef
IM
140 struct hw_perf_counter *hwc = &counter->hw;
141
142 if (unlikely(!perf_counters_initialized))
143 return -EINVAL;
144
145 /*
0475f9ea 146 * Generate PMC IRQs:
241771ef
IM
147 * (keep 'enabled' bit clear for now)
148 */
0475f9ea 149 hwc->config = ARCH_PERFMON_EVENTSEL_INT;
241771ef
IM
150
151 /*
0475f9ea 152 * Count user and OS events unless requested not to.
241771ef 153 */
0475f9ea
PM
154 if (!hw_event->exclude_user)
155 hwc->config |= ARCH_PERFMON_EVENTSEL_USR;
156 if (!hw_event->exclude_kernel)
241771ef 157 hwc->config |= ARCH_PERFMON_EVENTSEL_OS;
0475f9ea
PM
158
159 /*
160 * If privileged enough, allow NMI events:
161 */
162 hwc->nmi = 0;
163 if (capable(CAP_SYS_ADMIN) && hw_event->nmi)
164 hwc->nmi = 1;
241771ef 165
9f66a381 166 hwc->irq_period = hw_event->irq_period;
241771ef
IM
167 /*
168 * Intel PMCs cannot be accessed sanely above 32 bit width,
169 * so we install an artificial 1<<31 period regardless of
170 * the generic counter period:
171 */
f87ad35d
JSR
172 if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL)
173 if ((s64)hwc->irq_period <= 0 || hwc->irq_period > 0x7FFFFFFF)
174 hwc->irq_period = 0x7FFFFFFF;
241771ef 175
ee06094f 176 atomic64_set(&hwc->period_left, hwc->irq_period);
241771ef
IM
177
178 /*
dfa7c899 179 * Raw event type provide the config in the event structure
241771ef 180 */
9f66a381
IM
181 if (hw_event->raw) {
182 hwc->config |= hw_event->type;
241771ef 183 } else {
b56a3802 184 if (hw_event->type >= pmc_ops->max_events)
241771ef
IM
185 return -EINVAL;
186 /*
187 * The generic map:
188 */
b56a3802 189 hwc->config |= pmc_ops->event_map(hw_event->type);
241771ef 190 }
241771ef
IM
191 counter->wakeup_pending = 0;
192
193 return 0;
194}
195
b56a3802 196static u64 pmc_intel_save_disable_all(void)
4ac13294
TG
197{
198 u64 ctrl;
199
200 rdmsrl(MSR_CORE_PERF_GLOBAL_CTRL, ctrl);
862a1a5f 201 wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, 0);
2b9ff0db 202
4ac13294 203 return ctrl;
241771ef 204}
b56a3802 205
f87ad35d
JSR
206static u64 pmc_amd_save_disable_all(void)
207{
208 int idx;
209 u64 val, ctrl = 0;
210
211 for (idx = 0; idx < nr_counters_generic; idx++) {
212 rdmsrl(MSR_K7_EVNTSEL0 + idx, val);
213 if (val & ARCH_PERFMON_EVENTSEL0_ENABLE)
214 ctrl |= (1 << idx);
215 val &= ~ARCH_PERFMON_EVENTSEL0_ENABLE;
216 wrmsrl(MSR_K7_EVNTSEL0 + idx, val);
217 }
218
219 return ctrl;
220}
221
b56a3802
JSR
222u64 hw_perf_save_disable(void)
223{
224 if (unlikely(!perf_counters_initialized))
225 return 0;
226
227 return pmc_ops->save_disable_all();
228}
01b2838c 229EXPORT_SYMBOL_GPL(hw_perf_save_disable);
241771ef 230
b56a3802
JSR
231static void pmc_intel_restore_all(u64 ctrl)
232{
233 wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, ctrl);
234}
235
f87ad35d
JSR
236static void pmc_amd_restore_all(u64 ctrl)
237{
238 u64 val;
239 int idx;
240
241 for (idx = 0; idx < nr_counters_generic; idx++) {
242 if (ctrl & (1 << idx)) {
243 rdmsrl(MSR_K7_EVNTSEL0 + idx, val);
244 val |= ARCH_PERFMON_EVENTSEL0_ENABLE;
245 wrmsrl(MSR_K7_EVNTSEL0 + idx, val);
246 }
247 }
248}
249
ee06094f
IM
250void hw_perf_restore(u64 ctrl)
251{
2b9ff0db
IM
252 if (unlikely(!perf_counters_initialized))
253 return;
254
b56a3802 255 pmc_ops->restore_all(ctrl);
ee06094f
IM
256}
257EXPORT_SYMBOL_GPL(hw_perf_restore);
258
2f18d1e8
IM
259static inline void
260__pmc_fixed_disable(struct perf_counter *counter,
261 struct hw_perf_counter *hwc, unsigned int __idx)
262{
263 int idx = __idx - X86_PMC_IDX_FIXED;
264 u64 ctrl_val, mask;
265 int err;
266
267 mask = 0xfULL << (idx * 4);
268
269 rdmsrl(hwc->config_base, ctrl_val);
270 ctrl_val &= ~mask;
271 err = checking_wrmsrl(hwc->config_base, ctrl_val);
272}
273
7e2ae347 274static inline void
eb2b8618 275__pmc_generic_disable(struct perf_counter *counter,
ee06094f 276 struct hw_perf_counter *hwc, unsigned int idx)
7e2ae347 277{
2f18d1e8 278 if (unlikely(hwc->config_base == MSR_ARCH_PERFMON_FIXED_CTR_CTRL))
2b583d8b
JSR
279 __pmc_fixed_disable(counter, hwc, idx);
280 else
281 wrmsr_safe(hwc->config_base + idx, hwc->config, 0);
7e2ae347
IM
282}
283
2f18d1e8 284static DEFINE_PER_CPU(u64, prev_left[X86_PMC_IDX_MAX]);
241771ef 285
ee06094f
IM
286/*
287 * Set the next IRQ period, based on the hwc->period_left value.
288 * To be called with the counter disabled in hw:
289 */
290static void
291__hw_perf_counter_set_period(struct perf_counter *counter,
292 struct hw_perf_counter *hwc, int idx)
241771ef 293{
2f18d1e8 294 s64 left = atomic64_read(&hwc->period_left);
ee06094f 295 s32 period = hwc->irq_period;
2f18d1e8 296 int err;
ee06094f 297
ee06094f
IM
298 /*
299 * If we are way outside a reasoable range then just skip forward:
300 */
301 if (unlikely(left <= -period)) {
302 left = period;
303 atomic64_set(&hwc->period_left, left);
304 }
305
306 if (unlikely(left <= 0)) {
307 left += period;
308 atomic64_set(&hwc->period_left, left);
309 }
241771ef 310
ee06094f
IM
311 per_cpu(prev_left[idx], smp_processor_id()) = left;
312
313 /*
314 * The hw counter starts counting from this counter offset,
315 * mark it to be able to extra future deltas:
316 */
2f18d1e8 317 atomic64_set(&hwc->prev_count, (u64)-left);
ee06094f 318
2f18d1e8
IM
319 err = checking_wrmsrl(hwc->counter_base + idx,
320 (u64)(-left) & counter_value_mask);
321}
322
323static inline void
324__pmc_fixed_enable(struct perf_counter *counter,
325 struct hw_perf_counter *hwc, unsigned int __idx)
326{
327 int idx = __idx - X86_PMC_IDX_FIXED;
328 u64 ctrl_val, bits, mask;
329 int err;
330
331 /*
0475f9ea
PM
332 * Enable IRQ generation (0x8),
333 * and enable ring-3 counting (0x2) and ring-0 counting (0x1)
334 * if requested:
2f18d1e8 335 */
0475f9ea
PM
336 bits = 0x8ULL;
337 if (hwc->config & ARCH_PERFMON_EVENTSEL_USR)
338 bits |= 0x2;
2f18d1e8
IM
339 if (hwc->config & ARCH_PERFMON_EVENTSEL_OS)
340 bits |= 0x1;
341 bits <<= (idx * 4);
342 mask = 0xfULL << (idx * 4);
343
344 rdmsrl(hwc->config_base, ctrl_val);
345 ctrl_val &= ~mask;
346 ctrl_val |= bits;
347 err = checking_wrmsrl(hwc->config_base, ctrl_val);
7e2ae347
IM
348}
349
ee06094f 350static void
eb2b8618 351__pmc_generic_enable(struct perf_counter *counter,
ee06094f 352 struct hw_perf_counter *hwc, int idx)
7e2ae347 353{
2f18d1e8 354 if (unlikely(hwc->config_base == MSR_ARCH_PERFMON_FIXED_CTR_CTRL))
2b583d8b
JSR
355 __pmc_fixed_enable(counter, hwc, idx);
356 else
357 wrmsr(hwc->config_base + idx,
358 hwc->config | ARCH_PERFMON_EVENTSEL0_ENABLE, 0);
241771ef
IM
359}
360
2f18d1e8
IM
361static int
362fixed_mode_idx(struct perf_counter *counter, struct hw_perf_counter *hwc)
862a1a5f 363{
2f18d1e8
IM
364 unsigned int event;
365
f87ad35d
JSR
366 if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD)
367 return -1;
368
2f18d1e8
IM
369 if (unlikely(hwc->nmi))
370 return -1;
371
372 event = hwc->config & ARCH_PERFMON_EVENT_MASK;
373
b56a3802 374 if (unlikely(event == pmc_ops->event_map(PERF_COUNT_INSTRUCTIONS)))
2f18d1e8 375 return X86_PMC_IDX_FIXED_INSTRUCTIONS;
b56a3802 376 if (unlikely(event == pmc_ops->event_map(PERF_COUNT_CPU_CYCLES)))
2f18d1e8 377 return X86_PMC_IDX_FIXED_CPU_CYCLES;
b56a3802 378 if (unlikely(event == pmc_ops->event_map(PERF_COUNT_BUS_CYCLES)))
2f18d1e8
IM
379 return X86_PMC_IDX_FIXED_BUS_CYCLES;
380
862a1a5f
IM
381 return -1;
382}
383
ee06094f
IM
384/*
385 * Find a PMC slot for the freshly enabled / scheduled in counter:
386 */
95cdd2e7 387static int pmc_generic_enable(struct perf_counter *counter)
241771ef
IM
388{
389 struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters);
390 struct hw_perf_counter *hwc = &counter->hw;
2f18d1e8 391 int idx;
241771ef 392
2f18d1e8
IM
393 idx = fixed_mode_idx(counter, hwc);
394 if (idx >= 0) {
395 /*
396 * Try to get the fixed counter, if that is already taken
397 * then try to get a generic counter:
398 */
399 if (test_and_set_bit(idx, cpuc->used))
400 goto try_generic;
0dff86aa 401
2f18d1e8
IM
402 hwc->config_base = MSR_ARCH_PERFMON_FIXED_CTR_CTRL;
403 /*
404 * We set it so that counter_base + idx in wrmsr/rdmsr maps to
405 * MSR_ARCH_PERFMON_FIXED_CTR0 ... CTR2:
406 */
407 hwc->counter_base =
408 MSR_ARCH_PERFMON_FIXED_CTR0 - X86_PMC_IDX_FIXED;
241771ef 409 hwc->idx = idx;
2f18d1e8
IM
410 } else {
411 idx = hwc->idx;
412 /* Try to get the previous generic counter again */
413 if (test_and_set_bit(idx, cpuc->used)) {
414try_generic:
415 idx = find_first_zero_bit(cpuc->used, nr_counters_generic);
416 if (idx == nr_counters_generic)
417 return -EAGAIN;
418
419 set_bit(idx, cpuc->used);
420 hwc->idx = idx;
421 }
b56a3802
JSR
422 hwc->config_base = pmc_ops->eventsel;
423 hwc->counter_base = pmc_ops->perfctr;
241771ef
IM
424 }
425
426 perf_counters_lapic_init(hwc->nmi);
427
eb2b8618 428 __pmc_generic_disable(counter, hwc, idx);
241771ef 429
862a1a5f 430 cpuc->counters[idx] = counter;
2f18d1e8
IM
431 /*
432 * Make it visible before enabling the hw:
433 */
434 smp_wmb();
7e2ae347 435
ee06094f 436 __hw_perf_counter_set_period(counter, hwc, idx);
eb2b8618 437 __pmc_generic_enable(counter, hwc, idx);
95cdd2e7
IM
438
439 return 0;
241771ef
IM
440}
441
442void perf_counter_print_debug(void)
443{
2f18d1e8 444 u64 ctrl, status, overflow, pmc_ctrl, pmc_count, prev_left, fixed;
0dff86aa 445 struct cpu_hw_counters *cpuc;
1e125676
IM
446 int cpu, idx;
447
862a1a5f 448 if (!nr_counters_generic)
1e125676 449 return;
241771ef
IM
450
451 local_irq_disable();
452
453 cpu = smp_processor_id();
0dff86aa 454 cpuc = &per_cpu(cpu_hw_counters, cpu);
241771ef 455
f87ad35d 456 if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) {
a1ef58f4
JSR
457 rdmsrl(MSR_CORE_PERF_GLOBAL_CTRL, ctrl);
458 rdmsrl(MSR_CORE_PERF_GLOBAL_STATUS, status);
459 rdmsrl(MSR_CORE_PERF_GLOBAL_OVF_CTRL, overflow);
460 rdmsrl(MSR_ARCH_PERFMON_FIXED_CTR_CTRL, fixed);
461
462 pr_info("\n");
463 pr_info("CPU#%d: ctrl: %016llx\n", cpu, ctrl);
464 pr_info("CPU#%d: status: %016llx\n", cpu, status);
465 pr_info("CPU#%d: overflow: %016llx\n", cpu, overflow);
466 pr_info("CPU#%d: fixed: %016llx\n", cpu, fixed);
f87ad35d 467 }
a1ef58f4 468 pr_info("CPU#%d: used: %016llx\n", cpu, *(u64 *)cpuc->used);
241771ef 469
862a1a5f 470 for (idx = 0; idx < nr_counters_generic; idx++) {
b56a3802
JSR
471 rdmsrl(pmc_ops->eventsel + idx, pmc_ctrl);
472 rdmsrl(pmc_ops->perfctr + idx, pmc_count);
241771ef 473
ee06094f 474 prev_left = per_cpu(prev_left[idx], cpu);
241771ef 475
a1ef58f4 476 pr_info("CPU#%d: gen-PMC%d ctrl: %016llx\n",
241771ef 477 cpu, idx, pmc_ctrl);
a1ef58f4 478 pr_info("CPU#%d: gen-PMC%d count: %016llx\n",
241771ef 479 cpu, idx, pmc_count);
a1ef58f4 480 pr_info("CPU#%d: gen-PMC%d left: %016llx\n",
ee06094f 481 cpu, idx, prev_left);
241771ef 482 }
2f18d1e8
IM
483 for (idx = 0; idx < nr_counters_fixed; idx++) {
484 rdmsrl(MSR_ARCH_PERFMON_FIXED_CTR0 + idx, pmc_count);
485
a1ef58f4 486 pr_info("CPU#%d: fixed-PMC%d count: %016llx\n",
2f18d1e8
IM
487 cpu, idx, pmc_count);
488 }
241771ef
IM
489 local_irq_enable();
490}
491
eb2b8618 492static void pmc_generic_disable(struct perf_counter *counter)
241771ef
IM
493{
494 struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters);
495 struct hw_perf_counter *hwc = &counter->hw;
496 unsigned int idx = hwc->idx;
497
eb2b8618 498 __pmc_generic_disable(counter, hwc, idx);
241771ef
IM
499
500 clear_bit(idx, cpuc->used);
862a1a5f 501 cpuc->counters[idx] = NULL;
2f18d1e8
IM
502 /*
503 * Make sure the cleared pointer becomes visible before we
504 * (potentially) free the counter:
505 */
506 smp_wmb();
241771ef 507
ee06094f
IM
508 /*
509 * Drain the remaining delta count out of a counter
510 * that we are disabling:
511 */
512 x86_perf_counter_update(counter, hwc, idx);
241771ef
IM
513}
514
515static void perf_store_irq_data(struct perf_counter *counter, u64 data)
516{
517 struct perf_data *irqdata = counter->irqdata;
518
519 if (irqdata->len > PERF_DATA_BUFLEN - sizeof(u64)) {
520 irqdata->overrun++;
521 } else {
522 u64 *p = (u64 *) &irqdata->data[irqdata->len];
523
524 *p = data;
525 irqdata->len += sizeof(u64);
526 }
527}
528
7e2ae347 529/*
ee06094f
IM
530 * Save and restart an expired counter. Called by NMI contexts,
531 * so it has to be careful about preempting normal counter ops:
7e2ae347 532 */
241771ef
IM
533static void perf_save_and_restart(struct perf_counter *counter)
534{
535 struct hw_perf_counter *hwc = &counter->hw;
536 int idx = hwc->idx;
241771ef 537
ee06094f
IM
538 x86_perf_counter_update(counter, hwc, idx);
539 __hw_perf_counter_set_period(counter, hwc, idx);
7e2ae347 540
2f18d1e8 541 if (counter->state == PERF_COUNTER_STATE_ACTIVE)
eb2b8618 542 __pmc_generic_enable(counter, hwc, idx);
241771ef
IM
543}
544
545static void
04289bb9 546perf_handle_group(struct perf_counter *sibling, u64 *status, u64 *overflown)
241771ef 547{
04289bb9 548 struct perf_counter *counter, *group_leader = sibling->group_leader;
241771ef 549
04289bb9 550 /*
ee06094f 551 * Store sibling timestamps (if any):
04289bb9
IM
552 */
553 list_for_each_entry(counter, &group_leader->sibling_list, list_entry) {
2f18d1e8 554
ee06094f 555 x86_perf_counter_update(counter, &counter->hw, counter->hw.idx);
04289bb9 556 perf_store_irq_data(sibling, counter->hw_event.type);
ee06094f 557 perf_store_irq_data(sibling, atomic64_read(&counter->count));
241771ef
IM
558 }
559}
560
4b39fd96
MG
561/*
562 * Maximum interrupt frequency of 100KHz per CPU
563 */
169e41eb 564#define PERFMON_MAX_INTERRUPTS (100000/HZ)
4b39fd96 565
241771ef
IM
566/*
567 * This handler is triggered by the local APIC, so the APIC IRQ handling
568 * rules apply:
569 */
570static void __smp_perf_counter_interrupt(struct pt_regs *regs, int nmi)
571{
572 int bit, cpu = smp_processor_id();
4b39fd96 573 u64 ack, status;
1b023a96 574 struct cpu_hw_counters *cpuc = &per_cpu(cpu_hw_counters, cpu);
43874d23 575
1b023a96 576 rdmsrl(MSR_CORE_PERF_GLOBAL_CTRL, cpuc->global_enable);
241771ef 577
241771ef 578 /* Disable counters globally */
862a1a5f 579 wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, 0);
241771ef
IM
580 ack_APIC_irq();
581
87b9cf46
IM
582 rdmsrl(MSR_CORE_PERF_GLOBAL_STATUS, status);
583 if (!status)
584 goto out;
585
241771ef 586again:
d278c484 587 inc_irq_stat(apic_perf_irqs);
241771ef 588 ack = status;
2f18d1e8 589 for_each_bit(bit, (unsigned long *)&status, X86_PMC_IDX_MAX) {
862a1a5f 590 struct perf_counter *counter = cpuc->counters[bit];
241771ef
IM
591
592 clear_bit(bit, (unsigned long *) &status);
593 if (!counter)
594 continue;
595
596 perf_save_and_restart(counter);
597
9f66a381 598 switch (counter->hw_event.record_type) {
241771ef
IM
599 case PERF_RECORD_SIMPLE:
600 continue;
601 case PERF_RECORD_IRQ:
602 perf_store_irq_data(counter, instruction_pointer(regs));
603 break;
604 case PERF_RECORD_GROUP:
241771ef
IM
605 perf_handle_group(counter, &status, &ack);
606 break;
607 }
608 /*
609 * From NMI context we cannot call into the scheduler to
eb2b8618 610 * do a task wakeup - but we mark these generic as
241771ef
IM
611 * wakeup_pending and initate a wakeup callback:
612 */
613 if (nmi) {
614 counter->wakeup_pending = 1;
615 set_tsk_thread_flag(current, TIF_PERF_COUNTERS);
616 } else {
617 wake_up(&counter->waitq);
618 }
619 }
620
862a1a5f 621 wrmsrl(MSR_CORE_PERF_GLOBAL_OVF_CTRL, ack);
241771ef
IM
622
623 /*
624 * Repeat if there is more work to be done:
625 */
626 rdmsrl(MSR_CORE_PERF_GLOBAL_STATUS, status);
627 if (status)
628 goto again;
87b9cf46 629out:
241771ef 630 /*
1b023a96 631 * Restore - do not reenable when global enable is off or throttled:
241771ef 632 */
4b39fd96 633 if (++cpuc->interrupts < PERFMON_MAX_INTERRUPTS)
1b023a96
MG
634 wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, cpuc->global_enable);
635}
636
637void perf_counter_unthrottle(void)
638{
639 struct cpu_hw_counters *cpuc;
4b39fd96 640 u64 global_enable;
1b023a96
MG
641
642 if (!cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON))
643 return;
644
f87ad35d
JSR
645 if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD)
646 return;
647
1b023a96
MG
648 if (unlikely(!perf_counters_initialized))
649 return;
650
651 cpuc = &per_cpu(cpu_hw_counters, smp_processor_id());
4b39fd96 652 if (cpuc->interrupts >= PERFMON_MAX_INTERRUPTS) {
1b023a96 653 if (printk_ratelimit())
4b39fd96 654 printk(KERN_WARNING "PERFMON: max interrupts exceeded!\n");
1b023a96 655 wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, cpuc->global_enable);
1b023a96 656 }
4b39fd96
MG
657 rdmsrl(MSR_CORE_PERF_GLOBAL_CTRL, global_enable);
658 if (unlikely(cpuc->global_enable && !global_enable))
659 wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, cpuc->global_enable);
660 cpuc->interrupts = 0;
241771ef
IM
661}
662
663void smp_perf_counter_interrupt(struct pt_regs *regs)
664{
665 irq_enter();
241771ef
IM
666 apic_write(APIC_LVTPC, LOCAL_PERF_VECTOR);
667 __smp_perf_counter_interrupt(regs, 0);
668
669 irq_exit();
670}
671
672/*
673 * This handler is triggered by NMI contexts:
674 */
675void perf_counter_notify(struct pt_regs *regs)
676{
677 struct cpu_hw_counters *cpuc;
678 unsigned long flags;
679 int bit, cpu;
680
681 local_irq_save(flags);
682 cpu = smp_processor_id();
683 cpuc = &per_cpu(cpu_hw_counters, cpu);
684
862a1a5f
IM
685 for_each_bit(bit, cpuc->used, X86_PMC_IDX_MAX) {
686 struct perf_counter *counter = cpuc->counters[bit];
241771ef
IM
687
688 if (!counter)
689 continue;
690
691 if (counter->wakeup_pending) {
692 counter->wakeup_pending = 0;
693 wake_up(&counter->waitq);
694 }
695 }
696
697 local_irq_restore(flags);
698}
699
3415dd91 700void perf_counters_lapic_init(int nmi)
241771ef
IM
701{
702 u32 apic_val;
703
704 if (!perf_counters_initialized)
705 return;
706 /*
707 * Enable the performance counter vector in the APIC LVT:
708 */
709 apic_val = apic_read(APIC_LVTERR);
710
711 apic_write(APIC_LVTERR, apic_val | APIC_LVT_MASKED);
712 if (nmi)
713 apic_write(APIC_LVTPC, APIC_DM_NMI);
714 else
715 apic_write(APIC_LVTPC, LOCAL_PERF_VECTOR);
716 apic_write(APIC_LVTERR, apic_val);
717}
718
719static int __kprobes
720perf_counter_nmi_handler(struct notifier_block *self,
721 unsigned long cmd, void *__args)
722{
723 struct die_args *args = __args;
724 struct pt_regs *regs;
725
726 if (likely(cmd != DIE_NMI_IPI))
727 return NOTIFY_DONE;
728
729 regs = args->regs;
730
731 apic_write(APIC_LVTPC, APIC_DM_NMI);
732 __smp_perf_counter_interrupt(regs, 1);
733
734 return NOTIFY_STOP;
735}
736
737static __read_mostly struct notifier_block perf_counter_nmi_notifier = {
5b75af0a
MG
738 .notifier_call = perf_counter_nmi_handler,
739 .next = NULL,
740 .priority = 1
241771ef
IM
741};
742
b56a3802
JSR
743static struct pmc_x86_ops pmc_intel_ops = {
744 .save_disable_all = pmc_intel_save_disable_all,
745 .restore_all = pmc_intel_restore_all,
746 .eventsel = MSR_ARCH_PERFMON_EVENTSEL0,
747 .perfctr = MSR_ARCH_PERFMON_PERFCTR0,
748 .event_map = pmc_intel_event_map,
749 .max_events = ARRAY_SIZE(intel_perfmon_event_map),
750};
751
f87ad35d
JSR
752static struct pmc_x86_ops pmc_amd_ops = {
753 .save_disable_all = pmc_amd_save_disable_all,
754 .restore_all = pmc_amd_restore_all,
755 .eventsel = MSR_K7_EVNTSEL0,
756 .perfctr = MSR_K7_PERFCTR0,
757 .event_map = pmc_amd_event_map,
758 .max_events = ARRAY_SIZE(amd_perfmon_event_map),
759};
760
b56a3802 761static struct pmc_x86_ops *pmc_intel_init(void)
241771ef
IM
762{
763 union cpuid10_eax eax;
241771ef 764 unsigned int ebx;
703e937c
IM
765 unsigned int unused;
766 union cpuid10_edx edx;
241771ef 767
241771ef
IM
768 /*
769 * Check whether the Architectural PerfMon supports
770 * Branch Misses Retired Event or not.
771 */
703e937c 772 cpuid(10, &eax.full, &ebx, &unused, &edx.full);
241771ef 773 if (eax.split.mask_length <= ARCH_PERFMON_BRANCH_MISSES_RETIRED)
b56a3802 774 return NULL;
241771ef 775
a1ef58f4
JSR
776 pr_info("Intel Performance Monitoring support detected.\n");
777 pr_info("... version: %d\n", eax.split.version_id);
778 pr_info("... bit width: %d\n", eax.split.bit_width);
779 pr_info("... mask length: %d\n", eax.split.mask_length);
b56a3802 780
862a1a5f 781 nr_counters_generic = eax.split.num_counters;
b56a3802
JSR
782 nr_counters_fixed = edx.split.num_counters_fixed;
783 counter_value_mask = (1ULL << eax.split.bit_width) - 1;
784
785 return &pmc_intel_ops;
786}
787
f87ad35d
JSR
788static struct pmc_x86_ops *pmc_amd_init(void)
789{
790 nr_counters_generic = 4;
791 nr_counters_fixed = 0;
792
a1ef58f4 793 pr_info("AMD Performance Monitoring support detected.\n");
f87ad35d
JSR
794
795 return &pmc_amd_ops;
796}
797
b56a3802
JSR
798void __init init_hw_perf_counters(void)
799{
800 if (!cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON))
801 return;
802
803 switch (boot_cpu_data.x86_vendor) {
804 case X86_VENDOR_INTEL:
805 pmc_ops = pmc_intel_init();
806 break;
f87ad35d
JSR
807 case X86_VENDOR_AMD:
808 pmc_ops = pmc_amd_init();
809 break;
b56a3802
JSR
810 }
811 if (!pmc_ops)
812 return;
813
a1ef58f4 814 pr_info("... num counters: %d\n", nr_counters_generic);
862a1a5f
IM
815 if (nr_counters_generic > X86_PMC_MAX_GENERIC) {
816 nr_counters_generic = X86_PMC_MAX_GENERIC;
241771ef 817 WARN(1, KERN_ERR "hw perf counters %d > max(%d), clipping!",
862a1a5f 818 nr_counters_generic, X86_PMC_MAX_GENERIC);
241771ef 819 }
862a1a5f
IM
820 perf_counter_mask = (1 << nr_counters_generic) - 1;
821 perf_max_counters = nr_counters_generic;
241771ef 822
a1ef58f4 823 pr_info("... value mask: %016Lx\n", counter_value_mask);
2f18d1e8 824
862a1a5f
IM
825 if (nr_counters_fixed > X86_PMC_MAX_FIXED) {
826 nr_counters_fixed = X86_PMC_MAX_FIXED;
703e937c 827 WARN(1, KERN_ERR "hw perf counters fixed %d > max(%d), clipping!",
862a1a5f 828 nr_counters_fixed, X86_PMC_MAX_FIXED);
703e937c 829 }
a1ef58f4 830 pr_info("... fixed counters: %d\n", nr_counters_fixed);
862a1a5f
IM
831
832 perf_counter_mask |= ((1LL << nr_counters_fixed)-1) << X86_PMC_IDX_FIXED;
241771ef 833
a1ef58f4 834 pr_info("... counter mask: %016Lx\n", perf_counter_mask);
75f224cf
IM
835 perf_counters_initialized = true;
836
241771ef
IM
837 perf_counters_lapic_init(0);
838 register_die_notifier(&perf_counter_nmi_notifier);
241771ef 839}
621a01ea 840
eb2b8618 841static void pmc_generic_read(struct perf_counter *counter)
ee06094f
IM
842{
843 x86_perf_counter_update(counter, &counter->hw, counter->hw.idx);
844}
845
5c92d124 846static const struct hw_perf_counter_ops x86_perf_counter_ops = {
7671581f
IM
847 .enable = pmc_generic_enable,
848 .disable = pmc_generic_disable,
849 .read = pmc_generic_read,
621a01ea
IM
850};
851
5c92d124
IM
852const struct hw_perf_counter_ops *
853hw_perf_counter_init(struct perf_counter *counter)
621a01ea
IM
854{
855 int err;
856
857 err = __hw_perf_counter_init(counter);
858 if (err)
859 return NULL;
860
861 return &x86_perf_counter_ops;
862}