]>
Commit | Line | Data |
---|---|---|
b7169166 RR |
1 | /* |
2 | * Performance events - AMD IBS | |
3 | * | |
4 | * Copyright (C) 2011 Advanced Micro Devices, Inc., Robert Richter | |
5 | * | |
6 | * For licencing details see kernel-base/COPYING | |
7 | */ | |
8 | ||
9 | #include <linux/perf_event.h> | |
10 | #include <linux/module.h> | |
11 | #include <linux/pci.h> | |
d47e8238 | 12 | #include <linux/ptrace.h> |
b7169166 RR |
13 | |
14 | #include <asm/apic.h> | |
15 | ||
16 | static u32 ibs_caps; | |
17 | ||
18 | #if defined(CONFIG_PERF_EVENTS) && defined(CONFIG_CPU_SUP_AMD) | |
19 | ||
b7074f1f RR |
20 | #include <linux/kprobes.h> |
21 | #include <linux/hardirq.h> | |
22 | ||
23 | #include <asm/nmi.h> | |
24 | ||
51041943 RR |
25 | #define IBS_FETCH_CONFIG_MASK (IBS_FETCH_RAND_EN | IBS_FETCH_MAX_CNT) |
26 | #define IBS_OP_CONFIG_MASK IBS_OP_MAX_CNT | |
27 | ||
4db2e8e6 RR |
28 | enum ibs_states { |
29 | IBS_ENABLED = 0, | |
30 | IBS_STARTED = 1, | |
31 | IBS_STOPPING = 2, | |
32 | ||
33 | IBS_MAX_STATES, | |
34 | }; | |
35 | ||
36 | struct cpu_perf_ibs { | |
37 | struct perf_event *event; | |
38 | unsigned long state[BITS_TO_LONGS(IBS_MAX_STATES)]; | |
39 | }; | |
40 | ||
51041943 RR |
41 | struct perf_ibs { |
42 | struct pmu pmu; | |
43 | unsigned int msr; | |
44 | u64 config_mask; | |
45 | u64 cnt_mask; | |
46 | u64 enable_mask; | |
b7074f1f | 47 | u64 valid_mask; |
db98c5fa | 48 | u64 max_period; |
b7074f1f RR |
49 | unsigned long offset_mask[1]; |
50 | int offset_max; | |
4db2e8e6 | 51 | struct cpu_perf_ibs __percpu *pcpu; |
db98c5fa | 52 | u64 (*get_count)(u64 config); |
b7074f1f RR |
53 | }; |
54 | ||
55 | struct perf_ibs_data { | |
56 | u32 size; | |
57 | union { | |
58 | u32 data[0]; /* data buffer starts here */ | |
59 | u32 caps; | |
60 | }; | |
61 | u64 regs[MSR_AMD64_IBS_REG_COUNT_MAX]; | |
51041943 RR |
62 | }; |
63 | ||
db98c5fa | 64 | static int |
98112d2e | 65 | perf_event_set_period(struct hw_perf_event *hwc, u64 min, u64 max, u64 *hw_period) |
db98c5fa RR |
66 | { |
67 | s64 left = local64_read(&hwc->period_left); | |
68 | s64 period = hwc->sample_period; | |
69 | int overflow = 0; | |
70 | ||
71 | /* | |
72 | * If we are way outside a reasonable range then just skip forward: | |
73 | */ | |
74 | if (unlikely(left <= -period)) { | |
75 | left = period; | |
76 | local64_set(&hwc->period_left, left); | |
77 | hwc->last_period = period; | |
78 | overflow = 1; | |
79 | } | |
80 | ||
fc006cf7 | 81 | if (unlikely(left < (s64)min)) { |
db98c5fa RR |
82 | left += period; |
83 | local64_set(&hwc->period_left, left); | |
84 | hwc->last_period = period; | |
85 | overflow = 1; | |
86 | } | |
87 | ||
7caaf4d8 RR |
88 | /* |
89 | * If the hw period that triggers the sw overflow is too short | |
90 | * we might hit the irq handler. This biases the results. | |
91 | * Thus we shorten the next-to-last period and set the last | |
92 | * period to the max period. | |
93 | */ | |
94 | if (left > max) { | |
95 | left -= max; | |
96 | if (left > max) | |
97 | left = max; | |
98 | else if (left < min) | |
99 | left = min; | |
100 | } | |
db98c5fa | 101 | |
98112d2e | 102 | *hw_period = (u64)left; |
db98c5fa RR |
103 | |
104 | return overflow; | |
105 | } | |
106 | ||
107 | static int | |
108 | perf_event_try_update(struct perf_event *event, u64 new_raw_count, int width) | |
109 | { | |
110 | struct hw_perf_event *hwc = &event->hw; | |
111 | int shift = 64 - width; | |
112 | u64 prev_raw_count; | |
113 | u64 delta; | |
114 | ||
115 | /* | |
116 | * Careful: an NMI might modify the previous event value. | |
117 | * | |
118 | * Our tactic to handle this is to first atomically read and | |
119 | * exchange a new raw count - then add that new-prev delta | |
120 | * count to the generic event atomically: | |
121 | */ | |
122 | prev_raw_count = local64_read(&hwc->prev_count); | |
123 | if (local64_cmpxchg(&hwc->prev_count, prev_raw_count, | |
124 | new_raw_count) != prev_raw_count) | |
125 | return 0; | |
126 | ||
127 | /* | |
128 | * Now we have the new raw value and have updated the prev | |
129 | * timestamp already. We can now calculate the elapsed delta | |
130 | * (event-)time and add that to the generic event. | |
131 | * | |
132 | * Careful, not all hw sign-extends above the physical width | |
133 | * of the count. | |
134 | */ | |
135 | delta = (new_raw_count << shift) - (prev_raw_count << shift); | |
136 | delta >>= shift; | |
137 | ||
138 | local64_add(delta, &event->count); | |
139 | local64_sub(delta, &hwc->period_left); | |
140 | ||
141 | return 1; | |
142 | } | |
143 | ||
51041943 RR |
144 | static struct perf_ibs perf_ibs_fetch; |
145 | static struct perf_ibs perf_ibs_op; | |
146 | ||
147 | static struct perf_ibs *get_ibs_pmu(int type) | |
148 | { | |
149 | if (perf_ibs_fetch.pmu.type == type) | |
150 | return &perf_ibs_fetch; | |
151 | if (perf_ibs_op.pmu.type == type) | |
152 | return &perf_ibs_op; | |
153 | return NULL; | |
154 | } | |
b7169166 | 155 | |
450bbd49 RR |
156 | /* |
157 | * Use IBS for precise event sampling: | |
158 | * | |
159 | * perf record -a -e cpu-cycles:p ... # use ibs op counting cycle count | |
160 | * perf record -a -e r076:p ... # same as -e cpu-cycles:p | |
161 | * perf record -a -e r0C1:p ... # use ibs op counting micro-ops | |
162 | * | |
163 | * IbsOpCntCtl (bit 19) of IBS Execution Control Register (IbsOpCtl, | |
164 | * MSRC001_1033) is used to select either cycle or micro-ops counting | |
165 | * mode. | |
166 | * | |
167 | * The rip of IBS samples has skid 0. Thus, IBS supports precise | |
168 | * levels 1 and 2 and the PERF_EFLAGS_EXACT is set. In rare cases the | |
169 | * rip is invalid when IBS was not able to record the rip correctly. | |
170 | * We clear PERF_EFLAGS_EXACT and take the rip from pt_regs then. | |
171 | * | |
172 | */ | |
173 | static int perf_ibs_precise_event(struct perf_event *event, u64 *config) | |
174 | { | |
175 | switch (event->attr.precise_ip) { | |
176 | case 0: | |
177 | return -ENOENT; | |
178 | case 1: | |
179 | case 2: | |
180 | break; | |
181 | default: | |
182 | return -EOPNOTSUPP; | |
183 | } | |
184 | ||
185 | switch (event->attr.type) { | |
186 | case PERF_TYPE_HARDWARE: | |
187 | switch (event->attr.config) { | |
188 | case PERF_COUNT_HW_CPU_CYCLES: | |
189 | *config = 0; | |
190 | return 0; | |
191 | } | |
192 | break; | |
193 | case PERF_TYPE_RAW: | |
194 | switch (event->attr.config) { | |
195 | case 0x0076: | |
196 | *config = 0; | |
197 | return 0; | |
198 | case 0x00C1: | |
199 | *config = IBS_OP_CNT_CTL; | |
200 | return 0; | |
201 | } | |
202 | break; | |
203 | default: | |
204 | return -ENOENT; | |
205 | } | |
206 | ||
207 | return -EOPNOTSUPP; | |
208 | } | |
209 | ||
b7169166 RR |
210 | static int perf_ibs_init(struct perf_event *event) |
211 | { | |
51041943 RR |
212 | struct hw_perf_event *hwc = &event->hw; |
213 | struct perf_ibs *perf_ibs; | |
214 | u64 max_cnt, config; | |
450bbd49 | 215 | int ret; |
51041943 RR |
216 | |
217 | perf_ibs = get_ibs_pmu(event->attr.type); | |
450bbd49 RR |
218 | if (perf_ibs) { |
219 | config = event->attr.config; | |
220 | } else { | |
221 | perf_ibs = &perf_ibs_op; | |
222 | ret = perf_ibs_precise_event(event, &config); | |
223 | if (ret) | |
224 | return ret; | |
225 | } | |
226 | ||
227 | if (event->pmu != &perf_ibs->pmu) | |
b7169166 | 228 | return -ENOENT; |
51041943 | 229 | |
51041943 RR |
230 | if (config & ~perf_ibs->config_mask) |
231 | return -EINVAL; | |
232 | ||
233 | if (hwc->sample_period) { | |
234 | if (config & perf_ibs->cnt_mask) | |
235 | /* raw max_cnt may not be set */ | |
236 | return -EINVAL; | |
6accb9cf RR |
237 | if (!event->attr.sample_freq && hwc->sample_period & 0x0f) |
238 | /* | |
239 | * lower 4 bits can not be set in ibs max cnt, | |
240 | * but allowing it in case we adjust the | |
241 | * sample period to set a frequency. | |
242 | */ | |
51041943 | 243 | return -EINVAL; |
6accb9cf RR |
244 | hwc->sample_period &= ~0x0FULL; |
245 | if (!hwc->sample_period) | |
246 | hwc->sample_period = 0x10; | |
51041943 RR |
247 | } else { |
248 | max_cnt = config & perf_ibs->cnt_mask; | |
db98c5fa | 249 | config &= ~perf_ibs->cnt_mask; |
51041943 RR |
250 | event->attr.sample_period = max_cnt << 4; |
251 | hwc->sample_period = event->attr.sample_period; | |
252 | } | |
253 | ||
db98c5fa | 254 | if (!hwc->sample_period) |
51041943 RR |
255 | return -EINVAL; |
256 | ||
6accb9cf RR |
257 | /* |
258 | * If we modify hwc->sample_period, we also need to update | |
259 | * hwc->last_period and hwc->period_left. | |
260 | */ | |
261 | hwc->last_period = hwc->sample_period; | |
262 | local64_set(&hwc->period_left, hwc->sample_period); | |
263 | ||
51041943 RR |
264 | hwc->config_base = perf_ibs->msr; |
265 | hwc->config = config; | |
266 | ||
b7169166 RR |
267 | return 0; |
268 | } | |
269 | ||
db98c5fa RR |
270 | static int perf_ibs_set_period(struct perf_ibs *perf_ibs, |
271 | struct hw_perf_event *hwc, u64 *period) | |
272 | { | |
98112d2e | 273 | int overflow; |
db98c5fa RR |
274 | |
275 | /* ignore lower 4 bits in min count: */ | |
98112d2e | 276 | overflow = perf_event_set_period(hwc, 1<<4, perf_ibs->max_period, period); |
db98c5fa RR |
277 | local64_set(&hwc->prev_count, 0); |
278 | ||
98112d2e | 279 | return overflow; |
db98c5fa RR |
280 | } |
281 | ||
282 | static u64 get_ibs_fetch_count(u64 config) | |
283 | { | |
284 | return (config & IBS_FETCH_CNT) >> 12; | |
285 | } | |
286 | ||
287 | static u64 get_ibs_op_count(u64 config) | |
288 | { | |
289 | return (config & IBS_OP_CUR_CNT) >> 32; | |
290 | } | |
291 | ||
292 | static void | |
293 | perf_ibs_event_update(struct perf_ibs *perf_ibs, struct perf_event *event, | |
c9574fe0 | 294 | u64 *config) |
db98c5fa | 295 | { |
c9574fe0 | 296 | u64 count = perf_ibs->get_count(*config); |
db98c5fa RR |
297 | |
298 | while (!perf_event_try_update(event, count, 20)) { | |
c9574fe0 RR |
299 | rdmsrl(event->hw.config_base, *config); |
300 | count = perf_ibs->get_count(*config); | |
db98c5fa RR |
301 | } |
302 | } | |
303 | ||
c9574fe0 RR |
304 | static inline void perf_ibs_enable_event(struct perf_ibs *perf_ibs, |
305 | struct hw_perf_event *hwc, u64 config) | |
db98c5fa | 306 | { |
c9574fe0 RR |
307 | wrmsrl(hwc->config_base, hwc->config | config | perf_ibs->enable_mask); |
308 | } | |
309 | ||
310 | /* | |
311 | * Erratum #420 Instruction-Based Sampling Engine May Generate | |
312 | * Interrupt that Cannot Be Cleared: | |
313 | * | |
314 | * Must clear counter mask first, then clear the enable bit. See | |
315 | * Revision Guide for AMD Family 10h Processors, Publication #41322. | |
316 | */ | |
317 | static inline void perf_ibs_disable_event(struct perf_ibs *perf_ibs, | |
318 | struct hw_perf_event *hwc, u64 config) | |
319 | { | |
320 | config &= ~perf_ibs->cnt_mask; | |
321 | wrmsrl(hwc->config_base, config); | |
322 | config &= ~perf_ibs->enable_mask; | |
323 | wrmsrl(hwc->config_base, config); | |
db98c5fa RR |
324 | } |
325 | ||
326 | /* | |
327 | * We cannot restore the ibs pmu state, so we always needs to update | |
328 | * the event while stopping it and then reset the state when starting | |
329 | * again. Thus, ignoring PERF_EF_RELOAD and PERF_EF_UPDATE flags in | |
330 | * perf_ibs_start()/perf_ibs_stop() and instead always do it. | |
331 | */ | |
4db2e8e6 RR |
332 | static void perf_ibs_start(struct perf_event *event, int flags) |
333 | { | |
334 | struct hw_perf_event *hwc = &event->hw; | |
335 | struct perf_ibs *perf_ibs = container_of(event->pmu, struct perf_ibs, pmu); | |
336 | struct cpu_perf_ibs *pcpu = this_cpu_ptr(perf_ibs->pcpu); | |
c9574fe0 | 337 | u64 period; |
4db2e8e6 | 338 | |
db98c5fa | 339 | if (WARN_ON_ONCE(!(hwc->state & PERF_HES_STOPPED))) |
4db2e8e6 RR |
340 | return; |
341 | ||
db98c5fa RR |
342 | WARN_ON_ONCE(!(hwc->state & PERF_HES_UPTODATE)); |
343 | hwc->state = 0; | |
344 | ||
c9574fe0 | 345 | perf_ibs_set_period(perf_ibs, hwc, &period); |
db98c5fa | 346 | set_bit(IBS_STARTED, pcpu->state); |
c9574fe0 | 347 | perf_ibs_enable_event(perf_ibs, hwc, period >> 4); |
db98c5fa RR |
348 | |
349 | perf_event_update_userpage(event); | |
4db2e8e6 RR |
350 | } |
351 | ||
352 | static void perf_ibs_stop(struct perf_event *event, int flags) | |
353 | { | |
354 | struct hw_perf_event *hwc = &event->hw; | |
355 | struct perf_ibs *perf_ibs = container_of(event->pmu, struct perf_ibs, pmu); | |
356 | struct cpu_perf_ibs *pcpu = this_cpu_ptr(perf_ibs->pcpu); | |
c9574fe0 | 357 | u64 config; |
db98c5fa | 358 | int stopping; |
4db2e8e6 | 359 | |
db98c5fa | 360 | stopping = test_and_clear_bit(IBS_STARTED, pcpu->state); |
4db2e8e6 | 361 | |
db98c5fa RR |
362 | if (!stopping && (hwc->state & PERF_HES_UPTODATE)) |
363 | return; | |
4db2e8e6 | 364 | |
c9574fe0 | 365 | rdmsrl(hwc->config_base, config); |
db98c5fa RR |
366 | |
367 | if (stopping) { | |
368 | set_bit(IBS_STOPPING, pcpu->state); | |
c9574fe0 | 369 | perf_ibs_disable_event(perf_ibs, hwc, config); |
db98c5fa RR |
370 | WARN_ON_ONCE(hwc->state & PERF_HES_STOPPED); |
371 | hwc->state |= PERF_HES_STOPPED; | |
372 | } | |
373 | ||
374 | if (hwc->state & PERF_HES_UPTODATE) | |
375 | return; | |
376 | ||
c9574fe0 | 377 | perf_ibs_event_update(perf_ibs, event, &config); |
db98c5fa | 378 | hwc->state |= PERF_HES_UPTODATE; |
4db2e8e6 RR |
379 | } |
380 | ||
b7169166 RR |
381 | static int perf_ibs_add(struct perf_event *event, int flags) |
382 | { | |
4db2e8e6 RR |
383 | struct perf_ibs *perf_ibs = container_of(event->pmu, struct perf_ibs, pmu); |
384 | struct cpu_perf_ibs *pcpu = this_cpu_ptr(perf_ibs->pcpu); | |
385 | ||
386 | if (test_and_set_bit(IBS_ENABLED, pcpu->state)) | |
387 | return -ENOSPC; | |
388 | ||
db98c5fa RR |
389 | event->hw.state = PERF_HES_UPTODATE | PERF_HES_STOPPED; |
390 | ||
4db2e8e6 RR |
391 | pcpu->event = event; |
392 | ||
393 | if (flags & PERF_EF_START) | |
394 | perf_ibs_start(event, PERF_EF_RELOAD); | |
395 | ||
b7169166 RR |
396 | return 0; |
397 | } | |
398 | ||
399 | static void perf_ibs_del(struct perf_event *event, int flags) | |
400 | { | |
4db2e8e6 RR |
401 | struct perf_ibs *perf_ibs = container_of(event->pmu, struct perf_ibs, pmu); |
402 | struct cpu_perf_ibs *pcpu = this_cpu_ptr(perf_ibs->pcpu); | |
403 | ||
404 | if (!test_and_clear_bit(IBS_ENABLED, pcpu->state)) | |
405 | return; | |
406 | ||
db98c5fa | 407 | perf_ibs_stop(event, PERF_EF_UPDATE); |
4db2e8e6 RR |
408 | |
409 | pcpu->event = NULL; | |
db98c5fa RR |
410 | |
411 | perf_event_update_userpage(event); | |
b7169166 RR |
412 | } |
413 | ||
4db2e8e6 RR |
414 | static void perf_ibs_read(struct perf_event *event) { } |
415 | ||
51041943 RR |
416 | static struct perf_ibs perf_ibs_fetch = { |
417 | .pmu = { | |
418 | .task_ctx_nr = perf_invalid_context, | |
419 | ||
420 | .event_init = perf_ibs_init, | |
421 | .add = perf_ibs_add, | |
422 | .del = perf_ibs_del, | |
4db2e8e6 RR |
423 | .start = perf_ibs_start, |
424 | .stop = perf_ibs_stop, | |
425 | .read = perf_ibs_read, | |
51041943 RR |
426 | }, |
427 | .msr = MSR_AMD64_IBSFETCHCTL, | |
428 | .config_mask = IBS_FETCH_CONFIG_MASK, | |
429 | .cnt_mask = IBS_FETCH_MAX_CNT, | |
430 | .enable_mask = IBS_FETCH_ENABLE, | |
b7074f1f | 431 | .valid_mask = IBS_FETCH_VAL, |
db98c5fa | 432 | .max_period = IBS_FETCH_MAX_CNT << 4, |
b7074f1f RR |
433 | .offset_mask = { MSR_AMD64_IBSFETCH_REG_MASK }, |
434 | .offset_max = MSR_AMD64_IBSFETCH_REG_COUNT, | |
db98c5fa RR |
435 | |
436 | .get_count = get_ibs_fetch_count, | |
51041943 RR |
437 | }; |
438 | ||
439 | static struct perf_ibs perf_ibs_op = { | |
440 | .pmu = { | |
441 | .task_ctx_nr = perf_invalid_context, | |
442 | ||
443 | .event_init = perf_ibs_init, | |
444 | .add = perf_ibs_add, | |
445 | .del = perf_ibs_del, | |
4db2e8e6 RR |
446 | .start = perf_ibs_start, |
447 | .stop = perf_ibs_stop, | |
448 | .read = perf_ibs_read, | |
51041943 RR |
449 | }, |
450 | .msr = MSR_AMD64_IBSOPCTL, | |
451 | .config_mask = IBS_OP_CONFIG_MASK, | |
452 | .cnt_mask = IBS_OP_MAX_CNT, | |
453 | .enable_mask = IBS_OP_ENABLE, | |
b7074f1f | 454 | .valid_mask = IBS_OP_VAL, |
db98c5fa | 455 | .max_period = IBS_OP_MAX_CNT << 4, |
b7074f1f RR |
456 | .offset_mask = { MSR_AMD64_IBSOP_REG_MASK }, |
457 | .offset_max = MSR_AMD64_IBSOP_REG_COUNT, | |
db98c5fa RR |
458 | |
459 | .get_count = get_ibs_op_count, | |
b7169166 RR |
460 | }; |
461 | ||
b7074f1f RR |
462 | static int perf_ibs_handle_irq(struct perf_ibs *perf_ibs, struct pt_regs *iregs) |
463 | { | |
4db2e8e6 RR |
464 | struct cpu_perf_ibs *pcpu = this_cpu_ptr(perf_ibs->pcpu); |
465 | struct perf_event *event = pcpu->event; | |
b7074f1f RR |
466 | struct hw_perf_event *hwc = &event->hw; |
467 | struct perf_sample_data data; | |
468 | struct perf_raw_record raw; | |
469 | struct pt_regs regs; | |
470 | struct perf_ibs_data ibs_data; | |
d47e8238 | 471 | int offset, size, check_rip, offset_max, throttle = 0; |
b7074f1f | 472 | unsigned int msr; |
c9574fe0 | 473 | u64 *buf, *config, period; |
b7074f1f | 474 | |
4db2e8e6 RR |
475 | if (!test_bit(IBS_STARTED, pcpu->state)) { |
476 | /* Catch spurious interrupts after stopping IBS: */ | |
477 | if (!test_and_clear_bit(IBS_STOPPING, pcpu->state)) | |
478 | return 0; | |
479 | rdmsrl(perf_ibs->msr, *ibs_data.regs); | |
480 | return (*ibs_data.regs & perf_ibs->valid_mask) ? 1 : 0; | |
481 | } | |
482 | ||
b7074f1f RR |
483 | msr = hwc->config_base; |
484 | buf = ibs_data.regs; | |
485 | rdmsrl(msr, *buf); | |
486 | if (!(*buf++ & perf_ibs->valid_mask)) | |
487 | return 0; | |
488 | ||
c75841a3 RR |
489 | /* |
490 | * Emulate IbsOpCurCnt in MSRC001_1033 (IbsOpCtl), not | |
491 | * supported in all cpus. As this triggered an interrupt, we | |
492 | * set the current count to the max count. | |
493 | */ | |
c9574fe0 | 494 | config = &ibs_data.regs[0]; |
c75841a3 | 495 | if (perf_ibs == &perf_ibs_op && !(ibs_caps & IBS_CAPS_RDWROPCNT)) { |
c9574fe0 RR |
496 | *config &= ~IBS_OP_CUR_CNT; |
497 | *config |= (*config & IBS_OP_MAX_CNT) << 36; | |
c75841a3 RR |
498 | } |
499 | ||
500 | perf_ibs_event_update(perf_ibs, event, config); | |
fd0d000b | 501 | perf_sample_data_init(&data, 0, hwc->last_period); |
c9574fe0 | 502 | if (!perf_ibs_set_period(perf_ibs, hwc, &period)) |
d47e8238 RR |
503 | goto out; /* no sw counter overflow */ |
504 | ||
505 | ibs_data.caps = ibs_caps; | |
506 | size = 1; | |
507 | offset = 1; | |
508 | check_rip = (perf_ibs == &perf_ibs_op && (ibs_caps & IBS_CAPS_RIPINVALIDCHK)); | |
509 | if (event->attr.sample_type & PERF_SAMPLE_RAW) | |
510 | offset_max = perf_ibs->offset_max; | |
511 | else if (check_rip) | |
512 | offset_max = 2; | |
513 | else | |
514 | offset_max = 1; | |
515 | do { | |
516 | rdmsrl(msr + offset, *buf++); | |
517 | size++; | |
518 | offset = find_next_bit(perf_ibs->offset_mask, | |
519 | perf_ibs->offset_max, | |
520 | offset + 1); | |
521 | } while (offset < offset_max); | |
522 | ibs_data.size = sizeof(u64) * size; | |
523 | ||
524 | regs = *iregs; | |
450bbd49 RR |
525 | if (check_rip && (ibs_data.regs[2] & IBS_RIP_INVALID)) { |
526 | regs.flags &= ~PERF_EFLAGS_EXACT; | |
527 | } else { | |
d47e8238 | 528 | instruction_pointer_set(®s, ibs_data.regs[1]); |
450bbd49 RR |
529 | regs.flags |= PERF_EFLAGS_EXACT; |
530 | } | |
c75841a3 | 531 | |
b7074f1f | 532 | if (event->attr.sample_type & PERF_SAMPLE_RAW) { |
d47e8238 | 533 | raw.size = sizeof(u32) + ibs_data.size; |
b7074f1f RR |
534 | raw.data = ibs_data.data; |
535 | data.raw = &raw; | |
536 | } | |
537 | ||
d47e8238 RR |
538 | throttle = perf_event_overflow(event, &data, ®s); |
539 | out: | |
c9574fe0 RR |
540 | if (throttle) |
541 | perf_ibs_disable_event(perf_ibs, hwc, *config); | |
542 | else | |
543 | perf_ibs_enable_event(perf_ibs, hwc, period >> 4); | |
db98c5fa RR |
544 | |
545 | perf_event_update_userpage(event); | |
b7074f1f RR |
546 | |
547 | return 1; | |
548 | } | |
549 | ||
550 | static int __kprobes | |
551 | perf_ibs_nmi_handler(unsigned int cmd, struct pt_regs *regs) | |
552 | { | |
553 | int handled = 0; | |
554 | ||
555 | handled += perf_ibs_handle_irq(&perf_ibs_fetch, regs); | |
556 | handled += perf_ibs_handle_irq(&perf_ibs_op, regs); | |
557 | ||
558 | if (handled) | |
559 | inc_irq_stat(apic_perf_irqs); | |
560 | ||
561 | return handled; | |
562 | } | |
563 | ||
4db2e8e6 RR |
564 | static __init int perf_ibs_pmu_init(struct perf_ibs *perf_ibs, char *name) |
565 | { | |
566 | struct cpu_perf_ibs __percpu *pcpu; | |
567 | int ret; | |
568 | ||
569 | pcpu = alloc_percpu(struct cpu_perf_ibs); | |
570 | if (!pcpu) | |
571 | return -ENOMEM; | |
572 | ||
573 | perf_ibs->pcpu = pcpu; | |
574 | ||
575 | ret = perf_pmu_register(&perf_ibs->pmu, name, -1); | |
576 | if (ret) { | |
577 | perf_ibs->pcpu = NULL; | |
578 | free_percpu(pcpu); | |
579 | } | |
580 | ||
581 | return ret; | |
582 | } | |
583 | ||
b7169166 RR |
584 | static __init int perf_event_ibs_init(void) |
585 | { | |
586 | if (!ibs_caps) | |
587 | return -ENODEV; /* ibs not supported by the cpu */ | |
588 | ||
4db2e8e6 | 589 | perf_ibs_pmu_init(&perf_ibs_fetch, "ibs_fetch"); |
7bf35238 RR |
590 | if (ibs_caps & IBS_CAPS_OPCNT) |
591 | perf_ibs_op.config_mask |= IBS_OP_CNT_CTL; | |
4db2e8e6 | 592 | perf_ibs_pmu_init(&perf_ibs_op, "ibs_op"); |
fab06992 | 593 | register_nmi_handler(NMI_LOCAL, perf_ibs_nmi_handler, 0, "perf_ibs"); |
b7169166 RR |
594 | printk(KERN_INFO "perf: AMD IBS detected (0x%08x)\n", ibs_caps); |
595 | ||
596 | return 0; | |
597 | } | |
598 | ||
599 | #else /* defined(CONFIG_PERF_EVENTS) && defined(CONFIG_CPU_SUP_AMD) */ | |
600 | ||
601 | static __init int perf_event_ibs_init(void) { return 0; } | |
602 | ||
603 | #endif | |
604 | ||
605 | /* IBS - apic initialization, for perf and oprofile */ | |
606 | ||
607 | static __init u32 __get_ibs_caps(void) | |
608 | { | |
609 | u32 caps; | |
610 | unsigned int max_level; | |
611 | ||
612 | if (!boot_cpu_has(X86_FEATURE_IBS)) | |
613 | return 0; | |
614 | ||
615 | /* check IBS cpuid feature flags */ | |
616 | max_level = cpuid_eax(0x80000000); | |
617 | if (max_level < IBS_CPUID_FEATURES) | |
618 | return IBS_CAPS_DEFAULT; | |
619 | ||
620 | caps = cpuid_eax(IBS_CPUID_FEATURES); | |
621 | if (!(caps & IBS_CAPS_AVAIL)) | |
622 | /* cpuid flags not valid */ | |
623 | return IBS_CAPS_DEFAULT; | |
624 | ||
625 | return caps; | |
626 | } | |
627 | ||
628 | u32 get_ibs_caps(void) | |
629 | { | |
630 | return ibs_caps; | |
631 | } | |
632 | ||
633 | EXPORT_SYMBOL(get_ibs_caps); | |
634 | ||
635 | static inline int get_eilvt(int offset) | |
636 | { | |
637 | return !setup_APIC_eilvt(offset, 0, APIC_EILVT_MSG_NMI, 1); | |
638 | } | |
639 | ||
640 | static inline int put_eilvt(int offset) | |
641 | { | |
642 | return !setup_APIC_eilvt(offset, 0, 0, 1); | |
643 | } | |
644 | ||
645 | /* | |
646 | * Check and reserve APIC extended interrupt LVT offset for IBS if available. | |
647 | */ | |
648 | static inline int ibs_eilvt_valid(void) | |
649 | { | |
650 | int offset; | |
651 | u64 val; | |
652 | int valid = 0; | |
653 | ||
654 | preempt_disable(); | |
655 | ||
656 | rdmsrl(MSR_AMD64_IBSCTL, val); | |
657 | offset = val & IBSCTL_LVT_OFFSET_MASK; | |
658 | ||
659 | if (!(val & IBSCTL_LVT_OFFSET_VALID)) { | |
660 | pr_err(FW_BUG "cpu %d, invalid IBS interrupt offset %d (MSR%08X=0x%016llx)\n", | |
661 | smp_processor_id(), offset, MSR_AMD64_IBSCTL, val); | |
662 | goto out; | |
663 | } | |
664 | ||
665 | if (!get_eilvt(offset)) { | |
666 | pr_err(FW_BUG "cpu %d, IBS interrupt offset %d not available (MSR%08X=0x%016llx)\n", | |
667 | smp_processor_id(), offset, MSR_AMD64_IBSCTL, val); | |
668 | goto out; | |
669 | } | |
670 | ||
671 | valid = 1; | |
672 | out: | |
673 | preempt_enable(); | |
674 | ||
675 | return valid; | |
676 | } | |
677 | ||
678 | static int setup_ibs_ctl(int ibs_eilvt_off) | |
679 | { | |
680 | struct pci_dev *cpu_cfg; | |
681 | int nodes; | |
682 | u32 value = 0; | |
683 | ||
684 | nodes = 0; | |
685 | cpu_cfg = NULL; | |
686 | do { | |
687 | cpu_cfg = pci_get_device(PCI_VENDOR_ID_AMD, | |
688 | PCI_DEVICE_ID_AMD_10H_NB_MISC, | |
689 | cpu_cfg); | |
690 | if (!cpu_cfg) | |
691 | break; | |
692 | ++nodes; | |
693 | pci_write_config_dword(cpu_cfg, IBSCTL, ibs_eilvt_off | |
694 | | IBSCTL_LVT_OFFSET_VALID); | |
695 | pci_read_config_dword(cpu_cfg, IBSCTL, &value); | |
696 | if (value != (ibs_eilvt_off | IBSCTL_LVT_OFFSET_VALID)) { | |
697 | pci_dev_put(cpu_cfg); | |
698 | printk(KERN_DEBUG "Failed to setup IBS LVT offset, " | |
699 | "IBSCTL = 0x%08x\n", value); | |
700 | return -EINVAL; | |
701 | } | |
702 | } while (1); | |
703 | ||
704 | if (!nodes) { | |
705 | printk(KERN_DEBUG "No CPU node configured for IBS\n"); | |
706 | return -ENODEV; | |
707 | } | |
708 | ||
709 | return 0; | |
710 | } | |
711 | ||
712 | /* | |
713 | * This runs only on the current cpu. We try to find an LVT offset and | |
714 | * setup the local APIC. For this we must disable preemption. On | |
715 | * success we initialize all nodes with this offset. This updates then | |
716 | * the offset in the IBS_CTL per-node msr. The per-core APIC setup of | |
717 | * the IBS interrupt vector is handled by perf_ibs_cpu_notifier that | |
718 | * is using the new offset. | |
719 | */ | |
720 | static int force_ibs_eilvt_setup(void) | |
721 | { | |
722 | int offset; | |
723 | int ret; | |
724 | ||
725 | preempt_disable(); | |
726 | /* find the next free available EILVT entry, skip offset 0 */ | |
727 | for (offset = 1; offset < APIC_EILVT_NR_MAX; offset++) { | |
728 | if (get_eilvt(offset)) | |
729 | break; | |
730 | } | |
731 | preempt_enable(); | |
732 | ||
733 | if (offset == APIC_EILVT_NR_MAX) { | |
734 | printk(KERN_DEBUG "No EILVT entry available\n"); | |
735 | return -EBUSY; | |
736 | } | |
737 | ||
738 | ret = setup_ibs_ctl(offset); | |
739 | if (ret) | |
740 | goto out; | |
741 | ||
742 | if (!ibs_eilvt_valid()) { | |
743 | ret = -EFAULT; | |
744 | goto out; | |
745 | } | |
746 | ||
16e5294e | 747 | pr_info("IBS: LVT offset %d assigned\n", offset); |
b7169166 RR |
748 | |
749 | return 0; | |
750 | out: | |
751 | preempt_disable(); | |
752 | put_eilvt(offset); | |
753 | preempt_enable(); | |
754 | return ret; | |
755 | } | |
756 | ||
757 | static inline int get_ibs_lvt_offset(void) | |
758 | { | |
759 | u64 val; | |
760 | ||
761 | rdmsrl(MSR_AMD64_IBSCTL, val); | |
762 | if (!(val & IBSCTL_LVT_OFFSET_VALID)) | |
763 | return -EINVAL; | |
764 | ||
765 | return val & IBSCTL_LVT_OFFSET_MASK; | |
766 | } | |
767 | ||
768 | static void setup_APIC_ibs(void *dummy) | |
769 | { | |
770 | int offset; | |
771 | ||
772 | offset = get_ibs_lvt_offset(); | |
773 | if (offset < 0) | |
774 | goto failed; | |
775 | ||
776 | if (!setup_APIC_eilvt(offset, 0, APIC_EILVT_MSG_NMI, 0)) | |
777 | return; | |
778 | failed: | |
779 | pr_warn("perf: IBS APIC setup failed on cpu #%d\n", | |
780 | smp_processor_id()); | |
781 | } | |
782 | ||
783 | static void clear_APIC_ibs(void *dummy) | |
784 | { | |
785 | int offset; | |
786 | ||
787 | offset = get_ibs_lvt_offset(); | |
788 | if (offset >= 0) | |
789 | setup_APIC_eilvt(offset, 0, APIC_EILVT_MSG_FIX, 1); | |
790 | } | |
791 | ||
792 | static int __cpuinit | |
793 | perf_ibs_cpu_notifier(struct notifier_block *self, unsigned long action, void *hcpu) | |
794 | { | |
795 | switch (action & ~CPU_TASKS_FROZEN) { | |
796 | case CPU_STARTING: | |
797 | setup_APIC_ibs(NULL); | |
798 | break; | |
799 | case CPU_DYING: | |
800 | clear_APIC_ibs(NULL); | |
801 | break; | |
802 | default: | |
803 | break; | |
804 | } | |
805 | ||
806 | return NOTIFY_OK; | |
807 | } | |
808 | ||
809 | static __init int amd_ibs_init(void) | |
810 | { | |
811 | u32 caps; | |
16e5294e | 812 | int ret = -EINVAL; |
b7169166 RR |
813 | |
814 | caps = __get_ibs_caps(); | |
815 | if (!caps) | |
816 | return -ENODEV; /* ibs not supported by the cpu */ | |
817 | ||
16e5294e RR |
818 | /* |
819 | * Force LVT offset assignment for family 10h: The offsets are | |
820 | * not assigned by the BIOS for this family, so the OS is | |
821 | * responsible for doing it. If the OS assignment fails, fall | |
822 | * back to BIOS settings and try to setup this. | |
823 | */ | |
824 | if (boot_cpu_data.x86 == 0x10) | |
825 | force_ibs_eilvt_setup(); | |
826 | ||
827 | if (!ibs_eilvt_valid()) | |
828 | goto out; | |
b7169166 RR |
829 | |
830 | get_online_cpus(); | |
831 | ibs_caps = caps; | |
832 | /* make ibs_caps visible to other cpus: */ | |
833 | smp_mb(); | |
834 | perf_cpu_notifier(perf_ibs_cpu_notifier); | |
835 | smp_call_function(setup_APIC_ibs, NULL, 1); | |
836 | put_online_cpus(); | |
837 | ||
16e5294e RR |
838 | ret = perf_event_ibs_init(); |
839 | out: | |
840 | if (ret) | |
841 | pr_err("Failed to setup IBS, %d\n", ret); | |
842 | return ret; | |
b7169166 RR |
843 | } |
844 | ||
845 | /* Since we need the pci subsystem to init ibs we can't do this earlier: */ | |
846 | device_initcall(amd_ibs_init); |