]>
Commit | Line | Data |
---|---|---|
b7169166 RR |
1 | /* |
2 | * Performance events - AMD IBS | |
3 | * | |
4 | * Copyright (C) 2011 Advanced Micro Devices, Inc., Robert Richter | |
5 | * | |
6 | * For licencing details see kernel-base/COPYING | |
7 | */ | |
8 | ||
9 | #include <linux/perf_event.h> | |
10 | #include <linux/module.h> | |
11 | #include <linux/pci.h> | |
d47e8238 | 12 | #include <linux/ptrace.h> |
b7169166 RR |
13 | |
14 | #include <asm/apic.h> | |
15 | ||
16 | static u32 ibs_caps; | |
17 | ||
18 | #if defined(CONFIG_PERF_EVENTS) && defined(CONFIG_CPU_SUP_AMD) | |
19 | ||
b7074f1f RR |
20 | #include <linux/kprobes.h> |
21 | #include <linux/hardirq.h> | |
22 | ||
23 | #include <asm/nmi.h> | |
24 | ||
51041943 RR |
25 | #define IBS_FETCH_CONFIG_MASK (IBS_FETCH_RAND_EN | IBS_FETCH_MAX_CNT) |
26 | #define IBS_OP_CONFIG_MASK IBS_OP_MAX_CNT | |
27 | ||
4db2e8e6 RR |
28 | enum ibs_states { |
29 | IBS_ENABLED = 0, | |
30 | IBS_STARTED = 1, | |
31 | IBS_STOPPING = 2, | |
32 | ||
33 | IBS_MAX_STATES, | |
34 | }; | |
35 | ||
36 | struct cpu_perf_ibs { | |
37 | struct perf_event *event; | |
38 | unsigned long state[BITS_TO_LONGS(IBS_MAX_STATES)]; | |
39 | }; | |
40 | ||
51041943 RR |
41 | struct perf_ibs { |
42 | struct pmu pmu; | |
43 | unsigned int msr; | |
44 | u64 config_mask; | |
45 | u64 cnt_mask; | |
46 | u64 enable_mask; | |
b7074f1f | 47 | u64 valid_mask; |
db98c5fa | 48 | u64 max_period; |
b7074f1f RR |
49 | unsigned long offset_mask[1]; |
50 | int offset_max; | |
4db2e8e6 | 51 | struct cpu_perf_ibs __percpu *pcpu; |
db98c5fa | 52 | u64 (*get_count)(u64 config); |
b7074f1f RR |
53 | }; |
54 | ||
55 | struct perf_ibs_data { | |
56 | u32 size; | |
57 | union { | |
58 | u32 data[0]; /* data buffer starts here */ | |
59 | u32 caps; | |
60 | }; | |
61 | u64 regs[MSR_AMD64_IBS_REG_COUNT_MAX]; | |
51041943 RR |
62 | }; |
63 | ||
db98c5fa RR |
64 | static int |
65 | perf_event_set_period(struct hw_perf_event *hwc, u64 min, u64 max, u64 *count) | |
66 | { | |
67 | s64 left = local64_read(&hwc->period_left); | |
68 | s64 period = hwc->sample_period; | |
69 | int overflow = 0; | |
70 | ||
71 | /* | |
72 | * If we are way outside a reasonable range then just skip forward: | |
73 | */ | |
74 | if (unlikely(left <= -period)) { | |
75 | left = period; | |
76 | local64_set(&hwc->period_left, left); | |
77 | hwc->last_period = period; | |
78 | overflow = 1; | |
79 | } | |
80 | ||
81 | if (unlikely(left <= 0)) { | |
82 | left += period; | |
83 | local64_set(&hwc->period_left, left); | |
84 | hwc->last_period = period; | |
85 | overflow = 1; | |
86 | } | |
87 | ||
88 | if (unlikely(left < min)) | |
89 | left = min; | |
90 | ||
91 | if (left > max) | |
92 | left = max; | |
93 | ||
94 | *count = (u64)left; | |
95 | ||
96 | return overflow; | |
97 | } | |
98 | ||
99 | static int | |
100 | perf_event_try_update(struct perf_event *event, u64 new_raw_count, int width) | |
101 | { | |
102 | struct hw_perf_event *hwc = &event->hw; | |
103 | int shift = 64 - width; | |
104 | u64 prev_raw_count; | |
105 | u64 delta; | |
106 | ||
107 | /* | |
108 | * Careful: an NMI might modify the previous event value. | |
109 | * | |
110 | * Our tactic to handle this is to first atomically read and | |
111 | * exchange a new raw count - then add that new-prev delta | |
112 | * count to the generic event atomically: | |
113 | */ | |
114 | prev_raw_count = local64_read(&hwc->prev_count); | |
115 | if (local64_cmpxchg(&hwc->prev_count, prev_raw_count, | |
116 | new_raw_count) != prev_raw_count) | |
117 | return 0; | |
118 | ||
119 | /* | |
120 | * Now we have the new raw value and have updated the prev | |
121 | * timestamp already. We can now calculate the elapsed delta | |
122 | * (event-)time and add that to the generic event. | |
123 | * | |
124 | * Careful, not all hw sign-extends above the physical width | |
125 | * of the count. | |
126 | */ | |
127 | delta = (new_raw_count << shift) - (prev_raw_count << shift); | |
128 | delta >>= shift; | |
129 | ||
130 | local64_add(delta, &event->count); | |
131 | local64_sub(delta, &hwc->period_left); | |
132 | ||
133 | return 1; | |
134 | } | |
135 | ||
51041943 RR |
136 | static struct perf_ibs perf_ibs_fetch; |
137 | static struct perf_ibs perf_ibs_op; | |
138 | ||
139 | static struct perf_ibs *get_ibs_pmu(int type) | |
140 | { | |
141 | if (perf_ibs_fetch.pmu.type == type) | |
142 | return &perf_ibs_fetch; | |
143 | if (perf_ibs_op.pmu.type == type) | |
144 | return &perf_ibs_op; | |
145 | return NULL; | |
146 | } | |
b7169166 RR |
147 | |
148 | static int perf_ibs_init(struct perf_event *event) | |
149 | { | |
51041943 RR |
150 | struct hw_perf_event *hwc = &event->hw; |
151 | struct perf_ibs *perf_ibs; | |
152 | u64 max_cnt, config; | |
153 | ||
154 | perf_ibs = get_ibs_pmu(event->attr.type); | |
155 | if (!perf_ibs) | |
b7169166 | 156 | return -ENOENT; |
51041943 RR |
157 | |
158 | config = event->attr.config; | |
159 | if (config & ~perf_ibs->config_mask) | |
160 | return -EINVAL; | |
161 | ||
162 | if (hwc->sample_period) { | |
163 | if (config & perf_ibs->cnt_mask) | |
164 | /* raw max_cnt may not be set */ | |
165 | return -EINVAL; | |
6accb9cf RR |
166 | if (!event->attr.sample_freq && hwc->sample_period & 0x0f) |
167 | /* | |
168 | * lower 4 bits can not be set in ibs max cnt, | |
169 | * but allowing it in case we adjust the | |
170 | * sample period to set a frequency. | |
171 | */ | |
51041943 | 172 | return -EINVAL; |
6accb9cf RR |
173 | hwc->sample_period &= ~0x0FULL; |
174 | if (!hwc->sample_period) | |
175 | hwc->sample_period = 0x10; | |
51041943 RR |
176 | } else { |
177 | max_cnt = config & perf_ibs->cnt_mask; | |
db98c5fa | 178 | config &= ~perf_ibs->cnt_mask; |
51041943 RR |
179 | event->attr.sample_period = max_cnt << 4; |
180 | hwc->sample_period = event->attr.sample_period; | |
181 | } | |
182 | ||
db98c5fa | 183 | if (!hwc->sample_period) |
51041943 RR |
184 | return -EINVAL; |
185 | ||
6accb9cf RR |
186 | /* |
187 | * If we modify hwc->sample_period, we also need to update | |
188 | * hwc->last_period and hwc->period_left. | |
189 | */ | |
190 | hwc->last_period = hwc->sample_period; | |
191 | local64_set(&hwc->period_left, hwc->sample_period); | |
192 | ||
51041943 RR |
193 | hwc->config_base = perf_ibs->msr; |
194 | hwc->config = config; | |
195 | ||
b7169166 RR |
196 | return 0; |
197 | } | |
198 | ||
db98c5fa RR |
199 | static int perf_ibs_set_period(struct perf_ibs *perf_ibs, |
200 | struct hw_perf_event *hwc, u64 *period) | |
201 | { | |
202 | int ret; | |
203 | ||
204 | /* ignore lower 4 bits in min count: */ | |
205 | ret = perf_event_set_period(hwc, 1<<4, perf_ibs->max_period, period); | |
206 | local64_set(&hwc->prev_count, 0); | |
207 | ||
208 | return ret; | |
209 | } | |
210 | ||
211 | static u64 get_ibs_fetch_count(u64 config) | |
212 | { | |
213 | return (config & IBS_FETCH_CNT) >> 12; | |
214 | } | |
215 | ||
216 | static u64 get_ibs_op_count(u64 config) | |
217 | { | |
218 | return (config & IBS_OP_CUR_CNT) >> 32; | |
219 | } | |
220 | ||
221 | static void | |
222 | perf_ibs_event_update(struct perf_ibs *perf_ibs, struct perf_event *event, | |
223 | u64 config) | |
224 | { | |
225 | u64 count = perf_ibs->get_count(config); | |
226 | ||
227 | while (!perf_event_try_update(event, count, 20)) { | |
228 | rdmsrl(event->hw.config_base, config); | |
229 | count = perf_ibs->get_count(config); | |
230 | } | |
231 | } | |
232 | ||
233 | /* Note: The enable mask must be encoded in the config argument. */ | |
234 | static inline void perf_ibs_enable_event(struct hw_perf_event *hwc, u64 config) | |
235 | { | |
236 | wrmsrl(hwc->config_base, hwc->config | config); | |
237 | } | |
238 | ||
239 | /* | |
240 | * We cannot restore the ibs pmu state, so we always needs to update | |
241 | * the event while stopping it and then reset the state when starting | |
242 | * again. Thus, ignoring PERF_EF_RELOAD and PERF_EF_UPDATE flags in | |
243 | * perf_ibs_start()/perf_ibs_stop() and instead always do it. | |
244 | */ | |
4db2e8e6 RR |
245 | static void perf_ibs_start(struct perf_event *event, int flags) |
246 | { | |
247 | struct hw_perf_event *hwc = &event->hw; | |
248 | struct perf_ibs *perf_ibs = container_of(event->pmu, struct perf_ibs, pmu); | |
249 | struct cpu_perf_ibs *pcpu = this_cpu_ptr(perf_ibs->pcpu); | |
db98c5fa | 250 | u64 config; |
4db2e8e6 | 251 | |
db98c5fa | 252 | if (WARN_ON_ONCE(!(hwc->state & PERF_HES_STOPPED))) |
4db2e8e6 RR |
253 | return; |
254 | ||
db98c5fa RR |
255 | WARN_ON_ONCE(!(hwc->state & PERF_HES_UPTODATE)); |
256 | hwc->state = 0; | |
257 | ||
258 | perf_ibs_set_period(perf_ibs, hwc, &config); | |
259 | config = (config >> 4) | perf_ibs->enable_mask; | |
260 | set_bit(IBS_STARTED, pcpu->state); | |
261 | perf_ibs_enable_event(hwc, config); | |
262 | ||
263 | perf_event_update_userpage(event); | |
4db2e8e6 RR |
264 | } |
265 | ||
266 | static void perf_ibs_stop(struct perf_event *event, int flags) | |
267 | { | |
268 | struct hw_perf_event *hwc = &event->hw; | |
269 | struct perf_ibs *perf_ibs = container_of(event->pmu, struct perf_ibs, pmu); | |
270 | struct cpu_perf_ibs *pcpu = this_cpu_ptr(perf_ibs->pcpu); | |
271 | u64 val; | |
db98c5fa | 272 | int stopping; |
4db2e8e6 | 273 | |
db98c5fa | 274 | stopping = test_and_clear_bit(IBS_STARTED, pcpu->state); |
4db2e8e6 | 275 | |
db98c5fa RR |
276 | if (!stopping && (hwc->state & PERF_HES_UPTODATE)) |
277 | return; | |
4db2e8e6 RR |
278 | |
279 | rdmsrl(hwc->config_base, val); | |
db98c5fa RR |
280 | |
281 | if (stopping) { | |
282 | set_bit(IBS_STOPPING, pcpu->state); | |
283 | val &= ~perf_ibs->enable_mask; | |
284 | wrmsrl(hwc->config_base, val); | |
285 | WARN_ON_ONCE(hwc->state & PERF_HES_STOPPED); | |
286 | hwc->state |= PERF_HES_STOPPED; | |
287 | } | |
288 | ||
289 | if (hwc->state & PERF_HES_UPTODATE) | |
290 | return; | |
291 | ||
292 | perf_ibs_event_update(perf_ibs, event, val); | |
293 | hwc->state |= PERF_HES_UPTODATE; | |
4db2e8e6 RR |
294 | } |
295 | ||
b7169166 RR |
296 | static int perf_ibs_add(struct perf_event *event, int flags) |
297 | { | |
4db2e8e6 RR |
298 | struct perf_ibs *perf_ibs = container_of(event->pmu, struct perf_ibs, pmu); |
299 | struct cpu_perf_ibs *pcpu = this_cpu_ptr(perf_ibs->pcpu); | |
300 | ||
301 | if (test_and_set_bit(IBS_ENABLED, pcpu->state)) | |
302 | return -ENOSPC; | |
303 | ||
db98c5fa RR |
304 | event->hw.state = PERF_HES_UPTODATE | PERF_HES_STOPPED; |
305 | ||
4db2e8e6 RR |
306 | pcpu->event = event; |
307 | ||
308 | if (flags & PERF_EF_START) | |
309 | perf_ibs_start(event, PERF_EF_RELOAD); | |
310 | ||
b7169166 RR |
311 | return 0; |
312 | } | |
313 | ||
314 | static void perf_ibs_del(struct perf_event *event, int flags) | |
315 | { | |
4db2e8e6 RR |
316 | struct perf_ibs *perf_ibs = container_of(event->pmu, struct perf_ibs, pmu); |
317 | struct cpu_perf_ibs *pcpu = this_cpu_ptr(perf_ibs->pcpu); | |
318 | ||
319 | if (!test_and_clear_bit(IBS_ENABLED, pcpu->state)) | |
320 | return; | |
321 | ||
db98c5fa | 322 | perf_ibs_stop(event, PERF_EF_UPDATE); |
4db2e8e6 RR |
323 | |
324 | pcpu->event = NULL; | |
db98c5fa RR |
325 | |
326 | perf_event_update_userpage(event); | |
b7169166 RR |
327 | } |
328 | ||
4db2e8e6 RR |
329 | static void perf_ibs_read(struct perf_event *event) { } |
330 | ||
51041943 RR |
331 | static struct perf_ibs perf_ibs_fetch = { |
332 | .pmu = { | |
333 | .task_ctx_nr = perf_invalid_context, | |
334 | ||
335 | .event_init = perf_ibs_init, | |
336 | .add = perf_ibs_add, | |
337 | .del = perf_ibs_del, | |
4db2e8e6 RR |
338 | .start = perf_ibs_start, |
339 | .stop = perf_ibs_stop, | |
340 | .read = perf_ibs_read, | |
51041943 RR |
341 | }, |
342 | .msr = MSR_AMD64_IBSFETCHCTL, | |
343 | .config_mask = IBS_FETCH_CONFIG_MASK, | |
344 | .cnt_mask = IBS_FETCH_MAX_CNT, | |
345 | .enable_mask = IBS_FETCH_ENABLE, | |
b7074f1f | 346 | .valid_mask = IBS_FETCH_VAL, |
db98c5fa | 347 | .max_period = IBS_FETCH_MAX_CNT << 4, |
b7074f1f RR |
348 | .offset_mask = { MSR_AMD64_IBSFETCH_REG_MASK }, |
349 | .offset_max = MSR_AMD64_IBSFETCH_REG_COUNT, | |
db98c5fa RR |
350 | |
351 | .get_count = get_ibs_fetch_count, | |
51041943 RR |
352 | }; |
353 | ||
354 | static struct perf_ibs perf_ibs_op = { | |
355 | .pmu = { | |
356 | .task_ctx_nr = perf_invalid_context, | |
357 | ||
358 | .event_init = perf_ibs_init, | |
359 | .add = perf_ibs_add, | |
360 | .del = perf_ibs_del, | |
4db2e8e6 RR |
361 | .start = perf_ibs_start, |
362 | .stop = perf_ibs_stop, | |
363 | .read = perf_ibs_read, | |
51041943 RR |
364 | }, |
365 | .msr = MSR_AMD64_IBSOPCTL, | |
366 | .config_mask = IBS_OP_CONFIG_MASK, | |
367 | .cnt_mask = IBS_OP_MAX_CNT, | |
368 | .enable_mask = IBS_OP_ENABLE, | |
b7074f1f | 369 | .valid_mask = IBS_OP_VAL, |
db98c5fa | 370 | .max_period = IBS_OP_MAX_CNT << 4, |
b7074f1f RR |
371 | .offset_mask = { MSR_AMD64_IBSOP_REG_MASK }, |
372 | .offset_max = MSR_AMD64_IBSOP_REG_COUNT, | |
db98c5fa RR |
373 | |
374 | .get_count = get_ibs_op_count, | |
b7169166 RR |
375 | }; |
376 | ||
b7074f1f RR |
377 | static int perf_ibs_handle_irq(struct perf_ibs *perf_ibs, struct pt_regs *iregs) |
378 | { | |
4db2e8e6 RR |
379 | struct cpu_perf_ibs *pcpu = this_cpu_ptr(perf_ibs->pcpu); |
380 | struct perf_event *event = pcpu->event; | |
b7074f1f RR |
381 | struct hw_perf_event *hwc = &event->hw; |
382 | struct perf_sample_data data; | |
383 | struct perf_raw_record raw; | |
384 | struct pt_regs regs; | |
385 | struct perf_ibs_data ibs_data; | |
d47e8238 | 386 | int offset, size, check_rip, offset_max, throttle = 0; |
b7074f1f | 387 | unsigned int msr; |
db98c5fa | 388 | u64 *buf, config; |
b7074f1f | 389 | |
4db2e8e6 RR |
390 | if (!test_bit(IBS_STARTED, pcpu->state)) { |
391 | /* Catch spurious interrupts after stopping IBS: */ | |
392 | if (!test_and_clear_bit(IBS_STOPPING, pcpu->state)) | |
393 | return 0; | |
394 | rdmsrl(perf_ibs->msr, *ibs_data.regs); | |
395 | return (*ibs_data.regs & perf_ibs->valid_mask) ? 1 : 0; | |
396 | } | |
397 | ||
b7074f1f RR |
398 | msr = hwc->config_base; |
399 | buf = ibs_data.regs; | |
400 | rdmsrl(msr, *buf); | |
401 | if (!(*buf++ & perf_ibs->valid_mask)) | |
402 | return 0; | |
403 | ||
c75841a3 RR |
404 | /* |
405 | * Emulate IbsOpCurCnt in MSRC001_1033 (IbsOpCtl), not | |
406 | * supported in all cpus. As this triggered an interrupt, we | |
407 | * set the current count to the max count. | |
408 | */ | |
409 | config = ibs_data.regs[0]; | |
410 | if (perf_ibs == &perf_ibs_op && !(ibs_caps & IBS_CAPS_RDWROPCNT)) { | |
411 | config &= ~IBS_OP_CUR_CNT; | |
412 | config |= (config & IBS_OP_MAX_CNT) << 36; | |
413 | } | |
414 | ||
415 | perf_ibs_event_update(perf_ibs, event, config); | |
fd0d000b | 416 | perf_sample_data_init(&data, 0, hwc->last_period); |
d47e8238 RR |
417 | if (!perf_ibs_set_period(perf_ibs, hwc, &config)) |
418 | goto out; /* no sw counter overflow */ | |
419 | ||
420 | ibs_data.caps = ibs_caps; | |
421 | size = 1; | |
422 | offset = 1; | |
423 | check_rip = (perf_ibs == &perf_ibs_op && (ibs_caps & IBS_CAPS_RIPINVALIDCHK)); | |
424 | if (event->attr.sample_type & PERF_SAMPLE_RAW) | |
425 | offset_max = perf_ibs->offset_max; | |
426 | else if (check_rip) | |
427 | offset_max = 2; | |
428 | else | |
429 | offset_max = 1; | |
430 | do { | |
431 | rdmsrl(msr + offset, *buf++); | |
432 | size++; | |
433 | offset = find_next_bit(perf_ibs->offset_mask, | |
434 | perf_ibs->offset_max, | |
435 | offset + 1); | |
436 | } while (offset < offset_max); | |
437 | ibs_data.size = sizeof(u64) * size; | |
438 | ||
439 | regs = *iregs; | |
440 | if (!check_rip || !(ibs_data.regs[2] & IBS_RIP_INVALID)) | |
441 | instruction_pointer_set(®s, ibs_data.regs[1]); | |
c75841a3 | 442 | |
b7074f1f | 443 | if (event->attr.sample_type & PERF_SAMPLE_RAW) { |
d47e8238 | 444 | raw.size = sizeof(u32) + ibs_data.size; |
b7074f1f RR |
445 | raw.data = ibs_data.data; |
446 | data.raw = &raw; | |
447 | } | |
448 | ||
d47e8238 RR |
449 | throttle = perf_event_overflow(event, &data, ®s); |
450 | out: | |
451 | config = (config >> 4) | (throttle ? 0 : perf_ibs->enable_mask); | |
db98c5fa RR |
452 | perf_ibs_enable_event(hwc, config); |
453 | ||
454 | perf_event_update_userpage(event); | |
b7074f1f RR |
455 | |
456 | return 1; | |
457 | } | |
458 | ||
459 | static int __kprobes | |
460 | perf_ibs_nmi_handler(unsigned int cmd, struct pt_regs *regs) | |
461 | { | |
462 | int handled = 0; | |
463 | ||
464 | handled += perf_ibs_handle_irq(&perf_ibs_fetch, regs); | |
465 | handled += perf_ibs_handle_irq(&perf_ibs_op, regs); | |
466 | ||
467 | if (handled) | |
468 | inc_irq_stat(apic_perf_irqs); | |
469 | ||
470 | return handled; | |
471 | } | |
472 | ||
4db2e8e6 RR |
473 | static __init int perf_ibs_pmu_init(struct perf_ibs *perf_ibs, char *name) |
474 | { | |
475 | struct cpu_perf_ibs __percpu *pcpu; | |
476 | int ret; | |
477 | ||
478 | pcpu = alloc_percpu(struct cpu_perf_ibs); | |
479 | if (!pcpu) | |
480 | return -ENOMEM; | |
481 | ||
482 | perf_ibs->pcpu = pcpu; | |
483 | ||
484 | ret = perf_pmu_register(&perf_ibs->pmu, name, -1); | |
485 | if (ret) { | |
486 | perf_ibs->pcpu = NULL; | |
487 | free_percpu(pcpu); | |
488 | } | |
489 | ||
490 | return ret; | |
491 | } | |
492 | ||
b7169166 RR |
493 | static __init int perf_event_ibs_init(void) |
494 | { | |
495 | if (!ibs_caps) | |
496 | return -ENODEV; /* ibs not supported by the cpu */ | |
497 | ||
4db2e8e6 | 498 | perf_ibs_pmu_init(&perf_ibs_fetch, "ibs_fetch"); |
7bf35238 RR |
499 | if (ibs_caps & IBS_CAPS_OPCNT) |
500 | perf_ibs_op.config_mask |= IBS_OP_CNT_CTL; | |
4db2e8e6 | 501 | perf_ibs_pmu_init(&perf_ibs_op, "ibs_op"); |
fab06992 | 502 | register_nmi_handler(NMI_LOCAL, perf_ibs_nmi_handler, 0, "perf_ibs"); |
b7169166 RR |
503 | printk(KERN_INFO "perf: AMD IBS detected (0x%08x)\n", ibs_caps); |
504 | ||
505 | return 0; | |
506 | } | |
507 | ||
508 | #else /* defined(CONFIG_PERF_EVENTS) && defined(CONFIG_CPU_SUP_AMD) */ | |
509 | ||
510 | static __init int perf_event_ibs_init(void) { return 0; } | |
511 | ||
512 | #endif | |
513 | ||
514 | /* IBS - apic initialization, for perf and oprofile */ | |
515 | ||
516 | static __init u32 __get_ibs_caps(void) | |
517 | { | |
518 | u32 caps; | |
519 | unsigned int max_level; | |
520 | ||
521 | if (!boot_cpu_has(X86_FEATURE_IBS)) | |
522 | return 0; | |
523 | ||
524 | /* check IBS cpuid feature flags */ | |
525 | max_level = cpuid_eax(0x80000000); | |
526 | if (max_level < IBS_CPUID_FEATURES) | |
527 | return IBS_CAPS_DEFAULT; | |
528 | ||
529 | caps = cpuid_eax(IBS_CPUID_FEATURES); | |
530 | if (!(caps & IBS_CAPS_AVAIL)) | |
531 | /* cpuid flags not valid */ | |
532 | return IBS_CAPS_DEFAULT; | |
533 | ||
534 | return caps; | |
535 | } | |
536 | ||
537 | u32 get_ibs_caps(void) | |
538 | { | |
539 | return ibs_caps; | |
540 | } | |
541 | ||
542 | EXPORT_SYMBOL(get_ibs_caps); | |
543 | ||
544 | static inline int get_eilvt(int offset) | |
545 | { | |
546 | return !setup_APIC_eilvt(offset, 0, APIC_EILVT_MSG_NMI, 1); | |
547 | } | |
548 | ||
549 | static inline int put_eilvt(int offset) | |
550 | { | |
551 | return !setup_APIC_eilvt(offset, 0, 0, 1); | |
552 | } | |
553 | ||
554 | /* | |
555 | * Check and reserve APIC extended interrupt LVT offset for IBS if available. | |
556 | */ | |
557 | static inline int ibs_eilvt_valid(void) | |
558 | { | |
559 | int offset; | |
560 | u64 val; | |
561 | int valid = 0; | |
562 | ||
563 | preempt_disable(); | |
564 | ||
565 | rdmsrl(MSR_AMD64_IBSCTL, val); | |
566 | offset = val & IBSCTL_LVT_OFFSET_MASK; | |
567 | ||
568 | if (!(val & IBSCTL_LVT_OFFSET_VALID)) { | |
569 | pr_err(FW_BUG "cpu %d, invalid IBS interrupt offset %d (MSR%08X=0x%016llx)\n", | |
570 | smp_processor_id(), offset, MSR_AMD64_IBSCTL, val); | |
571 | goto out; | |
572 | } | |
573 | ||
574 | if (!get_eilvt(offset)) { | |
575 | pr_err(FW_BUG "cpu %d, IBS interrupt offset %d not available (MSR%08X=0x%016llx)\n", | |
576 | smp_processor_id(), offset, MSR_AMD64_IBSCTL, val); | |
577 | goto out; | |
578 | } | |
579 | ||
580 | valid = 1; | |
581 | out: | |
582 | preempt_enable(); | |
583 | ||
584 | return valid; | |
585 | } | |
586 | ||
587 | static int setup_ibs_ctl(int ibs_eilvt_off) | |
588 | { | |
589 | struct pci_dev *cpu_cfg; | |
590 | int nodes; | |
591 | u32 value = 0; | |
592 | ||
593 | nodes = 0; | |
594 | cpu_cfg = NULL; | |
595 | do { | |
596 | cpu_cfg = pci_get_device(PCI_VENDOR_ID_AMD, | |
597 | PCI_DEVICE_ID_AMD_10H_NB_MISC, | |
598 | cpu_cfg); | |
599 | if (!cpu_cfg) | |
600 | break; | |
601 | ++nodes; | |
602 | pci_write_config_dword(cpu_cfg, IBSCTL, ibs_eilvt_off | |
603 | | IBSCTL_LVT_OFFSET_VALID); | |
604 | pci_read_config_dword(cpu_cfg, IBSCTL, &value); | |
605 | if (value != (ibs_eilvt_off | IBSCTL_LVT_OFFSET_VALID)) { | |
606 | pci_dev_put(cpu_cfg); | |
607 | printk(KERN_DEBUG "Failed to setup IBS LVT offset, " | |
608 | "IBSCTL = 0x%08x\n", value); | |
609 | return -EINVAL; | |
610 | } | |
611 | } while (1); | |
612 | ||
613 | if (!nodes) { | |
614 | printk(KERN_DEBUG "No CPU node configured for IBS\n"); | |
615 | return -ENODEV; | |
616 | } | |
617 | ||
618 | return 0; | |
619 | } | |
620 | ||
621 | /* | |
622 | * This runs only on the current cpu. We try to find an LVT offset and | |
623 | * setup the local APIC. For this we must disable preemption. On | |
624 | * success we initialize all nodes with this offset. This updates then | |
625 | * the offset in the IBS_CTL per-node msr. The per-core APIC setup of | |
626 | * the IBS interrupt vector is handled by perf_ibs_cpu_notifier that | |
627 | * is using the new offset. | |
628 | */ | |
629 | static int force_ibs_eilvt_setup(void) | |
630 | { | |
631 | int offset; | |
632 | int ret; | |
633 | ||
634 | preempt_disable(); | |
635 | /* find the next free available EILVT entry, skip offset 0 */ | |
636 | for (offset = 1; offset < APIC_EILVT_NR_MAX; offset++) { | |
637 | if (get_eilvt(offset)) | |
638 | break; | |
639 | } | |
640 | preempt_enable(); | |
641 | ||
642 | if (offset == APIC_EILVT_NR_MAX) { | |
643 | printk(KERN_DEBUG "No EILVT entry available\n"); | |
644 | return -EBUSY; | |
645 | } | |
646 | ||
647 | ret = setup_ibs_ctl(offset); | |
648 | if (ret) | |
649 | goto out; | |
650 | ||
651 | if (!ibs_eilvt_valid()) { | |
652 | ret = -EFAULT; | |
653 | goto out; | |
654 | } | |
655 | ||
16e5294e | 656 | pr_info("IBS: LVT offset %d assigned\n", offset); |
b7169166 RR |
657 | |
658 | return 0; | |
659 | out: | |
660 | preempt_disable(); | |
661 | put_eilvt(offset); | |
662 | preempt_enable(); | |
663 | return ret; | |
664 | } | |
665 | ||
666 | static inline int get_ibs_lvt_offset(void) | |
667 | { | |
668 | u64 val; | |
669 | ||
670 | rdmsrl(MSR_AMD64_IBSCTL, val); | |
671 | if (!(val & IBSCTL_LVT_OFFSET_VALID)) | |
672 | return -EINVAL; | |
673 | ||
674 | return val & IBSCTL_LVT_OFFSET_MASK; | |
675 | } | |
676 | ||
677 | static void setup_APIC_ibs(void *dummy) | |
678 | { | |
679 | int offset; | |
680 | ||
681 | offset = get_ibs_lvt_offset(); | |
682 | if (offset < 0) | |
683 | goto failed; | |
684 | ||
685 | if (!setup_APIC_eilvt(offset, 0, APIC_EILVT_MSG_NMI, 0)) | |
686 | return; | |
687 | failed: | |
688 | pr_warn("perf: IBS APIC setup failed on cpu #%d\n", | |
689 | smp_processor_id()); | |
690 | } | |
691 | ||
692 | static void clear_APIC_ibs(void *dummy) | |
693 | { | |
694 | int offset; | |
695 | ||
696 | offset = get_ibs_lvt_offset(); | |
697 | if (offset >= 0) | |
698 | setup_APIC_eilvt(offset, 0, APIC_EILVT_MSG_FIX, 1); | |
699 | } | |
700 | ||
701 | static int __cpuinit | |
702 | perf_ibs_cpu_notifier(struct notifier_block *self, unsigned long action, void *hcpu) | |
703 | { | |
704 | switch (action & ~CPU_TASKS_FROZEN) { | |
705 | case CPU_STARTING: | |
706 | setup_APIC_ibs(NULL); | |
707 | break; | |
708 | case CPU_DYING: | |
709 | clear_APIC_ibs(NULL); | |
710 | break; | |
711 | default: | |
712 | break; | |
713 | } | |
714 | ||
715 | return NOTIFY_OK; | |
716 | } | |
717 | ||
718 | static __init int amd_ibs_init(void) | |
719 | { | |
720 | u32 caps; | |
16e5294e | 721 | int ret = -EINVAL; |
b7169166 RR |
722 | |
723 | caps = __get_ibs_caps(); | |
724 | if (!caps) | |
725 | return -ENODEV; /* ibs not supported by the cpu */ | |
726 | ||
16e5294e RR |
727 | /* |
728 | * Force LVT offset assignment for family 10h: The offsets are | |
729 | * not assigned by the BIOS for this family, so the OS is | |
730 | * responsible for doing it. If the OS assignment fails, fall | |
731 | * back to BIOS settings and try to setup this. | |
732 | */ | |
733 | if (boot_cpu_data.x86 == 0x10) | |
734 | force_ibs_eilvt_setup(); | |
735 | ||
736 | if (!ibs_eilvt_valid()) | |
737 | goto out; | |
b7169166 RR |
738 | |
739 | get_online_cpus(); | |
740 | ibs_caps = caps; | |
741 | /* make ibs_caps visible to other cpus: */ | |
742 | smp_mb(); | |
743 | perf_cpu_notifier(perf_ibs_cpu_notifier); | |
744 | smp_call_function(setup_APIC_ibs, NULL, 1); | |
745 | put_online_cpus(); | |
746 | ||
16e5294e RR |
747 | ret = perf_event_ibs_init(); |
748 | out: | |
749 | if (ret) | |
750 | pr_err("Failed to setup IBS, %d\n", ret); | |
751 | return ret; | |
b7169166 RR |
752 | } |
753 | ||
754 | /* Since we need the pci subsystem to init ibs we can't do this earlier: */ | |
755 | device_initcall(amd_ibs_init); |