]>
Commit | Line | Data |
---|---|---|
b7169166 RR |
1 | /* |
2 | * Performance events - AMD IBS | |
3 | * | |
4 | * Copyright (C) 2011 Advanced Micro Devices, Inc., Robert Richter | |
5 | * | |
6 | * For licencing details see kernel-base/COPYING | |
7 | */ | |
8 | ||
9 | #include <linux/perf_event.h> | |
eb008eb6 PG |
10 | #include <linux/init.h> |
11 | #include <linux/export.h> | |
b7169166 | 12 | #include <linux/pci.h> |
d47e8238 | 13 | #include <linux/ptrace.h> |
bee09ed9 | 14 | #include <linux/syscore_ops.h> |
e6017571 | 15 | #include <linux/sched/clock.h> |
b7169166 RR |
16 | |
17 | #include <asm/apic.h> | |
18 | ||
27f6d22b | 19 | #include "../perf_event.h" |
d07bdfd3 | 20 | |
b7169166 RR |
21 | static u32 ibs_caps; |
22 | ||
23 | #if defined(CONFIG_PERF_EVENTS) && defined(CONFIG_CPU_SUP_AMD) | |
24 | ||
b7074f1f RR |
25 | #include <linux/kprobes.h> |
26 | #include <linux/hardirq.h> | |
27 | ||
28 | #include <asm/nmi.h> | |
29 | ||
51041943 RR |
30 | #define IBS_FETCH_CONFIG_MASK (IBS_FETCH_RAND_EN | IBS_FETCH_MAX_CNT) |
31 | #define IBS_OP_CONFIG_MASK IBS_OP_MAX_CNT | |
32 | ||
85dc6002 PZ |
33 | |
34 | /* | |
35 | * IBS states: | |
36 | * | |
37 | * ENABLED; tracks the pmu::add(), pmu::del() state, when set the counter is taken | |
38 | * and any further add()s must fail. | |
39 | * | |
40 | * STARTED/STOPPING/STOPPED; deal with pmu::start(), pmu::stop() state but are | |
41 | * complicated by the fact that the IBS hardware can send late NMIs (ie. after | |
42 | * we've cleared the EN bit). | |
43 | * | |
44 | * In order to consume these late NMIs we have the STOPPED state, any NMI that | |
45 | * happens after we've cleared the EN state will clear this bit and report the | |
46 | * NMI handled (this is fundamentally racy in the face or multiple NMI sources, | |
47 | * someone else can consume our BIT and our NMI will go unhandled). | |
48 | * | |
49 | * And since we cannot set/clear this separate bit together with the EN bit, | |
50 | * there are races; if we cleared STARTED early, an NMI could land in | |
51 | * between clearing STARTED and clearing the EN bit (in fact multiple NMIs | |
52 | * could happen if the period is small enough), and consume our STOPPED bit | |
53 | * and trigger streams of unhandled NMIs. | |
54 | * | |
55 | * If, however, we clear STARTED late, an NMI can hit between clearing the | |
56 | * EN bit and clearing STARTED, still see STARTED set and process the event. | |
57 | * If this event will have the VALID bit clear, we bail properly, but this | |
58 | * is not a given. With VALID set we can end up calling pmu::stop() again | |
59 | * (the throttle logic) and trigger the WARNs in there. | |
60 | * | |
61 | * So what we do is set STOPPING before clearing EN to avoid the pmu::stop() | |
62 | * nesting, and clear STARTED late, so that we have a well defined state over | |
63 | * the clearing of the EN bit. | |
64 | * | |
65 | * XXX: we could probably be using !atomic bitops for all this. | |
66 | */ | |
67 | ||
4db2e8e6 RR |
68 | enum ibs_states { |
69 | IBS_ENABLED = 0, | |
70 | IBS_STARTED = 1, | |
71 | IBS_STOPPING = 2, | |
85dc6002 | 72 | IBS_STOPPED = 3, |
4db2e8e6 RR |
73 | |
74 | IBS_MAX_STATES, | |
75 | }; | |
76 | ||
77 | struct cpu_perf_ibs { | |
78 | struct perf_event *event; | |
79 | unsigned long state[BITS_TO_LONGS(IBS_MAX_STATES)]; | |
80 | }; | |
81 | ||
51041943 | 82 | struct perf_ibs { |
2e132b12 RR |
83 | struct pmu pmu; |
84 | unsigned int msr; | |
85 | u64 config_mask; | |
86 | u64 cnt_mask; | |
87 | u64 enable_mask; | |
88 | u64 valid_mask; | |
89 | u64 max_period; | |
90 | unsigned long offset_mask[1]; | |
91 | int offset_max; | |
92 | struct cpu_perf_ibs __percpu *pcpu; | |
93 | ||
94 | struct attribute **format_attrs; | |
95 | struct attribute_group format_group; | |
96 | const struct attribute_group *attr_groups[2]; | |
97 | ||
98 | u64 (*get_count)(u64 config); | |
b7074f1f RR |
99 | }; |
100 | ||
101 | struct perf_ibs_data { | |
102 | u32 size; | |
103 | union { | |
104 | u32 data[0]; /* data buffer starts here */ | |
105 | u32 caps; | |
106 | }; | |
107 | u64 regs[MSR_AMD64_IBS_REG_COUNT_MAX]; | |
51041943 RR |
108 | }; |
109 | ||
db98c5fa | 110 | static int |
98112d2e | 111 | perf_event_set_period(struct hw_perf_event *hwc, u64 min, u64 max, u64 *hw_period) |
db98c5fa RR |
112 | { |
113 | s64 left = local64_read(&hwc->period_left); | |
114 | s64 period = hwc->sample_period; | |
115 | int overflow = 0; | |
116 | ||
117 | /* | |
118 | * If we are way outside a reasonable range then just skip forward: | |
119 | */ | |
120 | if (unlikely(left <= -period)) { | |
121 | left = period; | |
122 | local64_set(&hwc->period_left, left); | |
123 | hwc->last_period = period; | |
124 | overflow = 1; | |
125 | } | |
126 | ||
fc006cf7 | 127 | if (unlikely(left < (s64)min)) { |
db98c5fa RR |
128 | left += period; |
129 | local64_set(&hwc->period_left, left); | |
130 | hwc->last_period = period; | |
131 | overflow = 1; | |
132 | } | |
133 | ||
7caaf4d8 RR |
134 | /* |
135 | * If the hw period that triggers the sw overflow is too short | |
136 | * we might hit the irq handler. This biases the results. | |
137 | * Thus we shorten the next-to-last period and set the last | |
138 | * period to the max period. | |
139 | */ | |
140 | if (left > max) { | |
141 | left -= max; | |
142 | if (left > max) | |
143 | left = max; | |
144 | else if (left < min) | |
145 | left = min; | |
146 | } | |
db98c5fa | 147 | |
98112d2e | 148 | *hw_period = (u64)left; |
db98c5fa RR |
149 | |
150 | return overflow; | |
151 | } | |
152 | ||
153 | static int | |
154 | perf_event_try_update(struct perf_event *event, u64 new_raw_count, int width) | |
155 | { | |
156 | struct hw_perf_event *hwc = &event->hw; | |
157 | int shift = 64 - width; | |
158 | u64 prev_raw_count; | |
159 | u64 delta; | |
160 | ||
161 | /* | |
162 | * Careful: an NMI might modify the previous event value. | |
163 | * | |
164 | * Our tactic to handle this is to first atomically read and | |
165 | * exchange a new raw count - then add that new-prev delta | |
166 | * count to the generic event atomically: | |
167 | */ | |
168 | prev_raw_count = local64_read(&hwc->prev_count); | |
169 | if (local64_cmpxchg(&hwc->prev_count, prev_raw_count, | |
170 | new_raw_count) != prev_raw_count) | |
171 | return 0; | |
172 | ||
173 | /* | |
174 | * Now we have the new raw value and have updated the prev | |
175 | * timestamp already. We can now calculate the elapsed delta | |
176 | * (event-)time and add that to the generic event. | |
177 | * | |
178 | * Careful, not all hw sign-extends above the physical width | |
179 | * of the count. | |
180 | */ | |
181 | delta = (new_raw_count << shift) - (prev_raw_count << shift); | |
182 | delta >>= shift; | |
183 | ||
184 | local64_add(delta, &event->count); | |
185 | local64_sub(delta, &hwc->period_left); | |
186 | ||
187 | return 1; | |
188 | } | |
189 | ||
51041943 RR |
190 | static struct perf_ibs perf_ibs_fetch; |
191 | static struct perf_ibs perf_ibs_op; | |
192 | ||
193 | static struct perf_ibs *get_ibs_pmu(int type) | |
194 | { | |
195 | if (perf_ibs_fetch.pmu.type == type) | |
196 | return &perf_ibs_fetch; | |
197 | if (perf_ibs_op.pmu.type == type) | |
198 | return &perf_ibs_op; | |
199 | return NULL; | |
200 | } | |
b7169166 | 201 | |
450bbd49 RR |
202 | /* |
203 | * Use IBS for precise event sampling: | |
204 | * | |
205 | * perf record -a -e cpu-cycles:p ... # use ibs op counting cycle count | |
206 | * perf record -a -e r076:p ... # same as -e cpu-cycles:p | |
207 | * perf record -a -e r0C1:p ... # use ibs op counting micro-ops | |
208 | * | |
209 | * IbsOpCntCtl (bit 19) of IBS Execution Control Register (IbsOpCtl, | |
210 | * MSRC001_1033) is used to select either cycle or micro-ops counting | |
211 | * mode. | |
212 | * | |
213 | * The rip of IBS samples has skid 0. Thus, IBS supports precise | |
214 | * levels 1 and 2 and the PERF_EFLAGS_EXACT is set. In rare cases the | |
215 | * rip is invalid when IBS was not able to record the rip correctly. | |
216 | * We clear PERF_EFLAGS_EXACT and take the rip from pt_regs then. | |
217 | * | |
218 | */ | |
219 | static int perf_ibs_precise_event(struct perf_event *event, u64 *config) | |
220 | { | |
221 | switch (event->attr.precise_ip) { | |
222 | case 0: | |
223 | return -ENOENT; | |
224 | case 1: | |
225 | case 2: | |
226 | break; | |
227 | default: | |
228 | return -EOPNOTSUPP; | |
229 | } | |
230 | ||
231 | switch (event->attr.type) { | |
232 | case PERF_TYPE_HARDWARE: | |
233 | switch (event->attr.config) { | |
234 | case PERF_COUNT_HW_CPU_CYCLES: | |
235 | *config = 0; | |
236 | return 0; | |
237 | } | |
238 | break; | |
239 | case PERF_TYPE_RAW: | |
240 | switch (event->attr.config) { | |
241 | case 0x0076: | |
242 | *config = 0; | |
243 | return 0; | |
244 | case 0x00C1: | |
245 | *config = IBS_OP_CNT_CTL; | |
246 | return 0; | |
247 | } | |
248 | break; | |
249 | default: | |
250 | return -ENOENT; | |
251 | } | |
252 | ||
253 | return -EOPNOTSUPP; | |
254 | } | |
255 | ||
bad9ac2d RR |
256 | static const struct perf_event_attr ibs_notsupp = { |
257 | .exclude_user = 1, | |
258 | .exclude_kernel = 1, | |
259 | .exclude_hv = 1, | |
260 | .exclude_idle = 1, | |
261 | .exclude_host = 1, | |
262 | .exclude_guest = 1, | |
263 | }; | |
264 | ||
b7169166 RR |
265 | static int perf_ibs_init(struct perf_event *event) |
266 | { | |
51041943 RR |
267 | struct hw_perf_event *hwc = &event->hw; |
268 | struct perf_ibs *perf_ibs; | |
269 | u64 max_cnt, config; | |
450bbd49 | 270 | int ret; |
51041943 RR |
271 | |
272 | perf_ibs = get_ibs_pmu(event->attr.type); | |
450bbd49 RR |
273 | if (perf_ibs) { |
274 | config = event->attr.config; | |
275 | } else { | |
276 | perf_ibs = &perf_ibs_op; | |
277 | ret = perf_ibs_precise_event(event, &config); | |
278 | if (ret) | |
279 | return ret; | |
280 | } | |
281 | ||
282 | if (event->pmu != &perf_ibs->pmu) | |
b7169166 | 283 | return -ENOENT; |
51041943 | 284 | |
bad9ac2d RR |
285 | if (perf_flags(&event->attr) & perf_flags(&ibs_notsupp)) |
286 | return -EINVAL; | |
287 | ||
51041943 RR |
288 | if (config & ~perf_ibs->config_mask) |
289 | return -EINVAL; | |
290 | ||
291 | if (hwc->sample_period) { | |
292 | if (config & perf_ibs->cnt_mask) | |
293 | /* raw max_cnt may not be set */ | |
294 | return -EINVAL; | |
6accb9cf RR |
295 | if (!event->attr.sample_freq && hwc->sample_period & 0x0f) |
296 | /* | |
297 | * lower 4 bits can not be set in ibs max cnt, | |
298 | * but allowing it in case we adjust the | |
299 | * sample period to set a frequency. | |
300 | */ | |
51041943 | 301 | return -EINVAL; |
6accb9cf RR |
302 | hwc->sample_period &= ~0x0FULL; |
303 | if (!hwc->sample_period) | |
304 | hwc->sample_period = 0x10; | |
51041943 RR |
305 | } else { |
306 | max_cnt = config & perf_ibs->cnt_mask; | |
db98c5fa | 307 | config &= ~perf_ibs->cnt_mask; |
51041943 RR |
308 | event->attr.sample_period = max_cnt << 4; |
309 | hwc->sample_period = event->attr.sample_period; | |
310 | } | |
311 | ||
db98c5fa | 312 | if (!hwc->sample_period) |
51041943 RR |
313 | return -EINVAL; |
314 | ||
6accb9cf RR |
315 | /* |
316 | * If we modify hwc->sample_period, we also need to update | |
317 | * hwc->last_period and hwc->period_left. | |
318 | */ | |
319 | hwc->last_period = hwc->sample_period; | |
320 | local64_set(&hwc->period_left, hwc->sample_period); | |
321 | ||
51041943 RR |
322 | hwc->config_base = perf_ibs->msr; |
323 | hwc->config = config; | |
324 | ||
b7169166 RR |
325 | return 0; |
326 | } | |
327 | ||
db98c5fa RR |
328 | static int perf_ibs_set_period(struct perf_ibs *perf_ibs, |
329 | struct hw_perf_event *hwc, u64 *period) | |
330 | { | |
98112d2e | 331 | int overflow; |
db98c5fa RR |
332 | |
333 | /* ignore lower 4 bits in min count: */ | |
98112d2e | 334 | overflow = perf_event_set_period(hwc, 1<<4, perf_ibs->max_period, period); |
db98c5fa RR |
335 | local64_set(&hwc->prev_count, 0); |
336 | ||
98112d2e | 337 | return overflow; |
db98c5fa RR |
338 | } |
339 | ||
340 | static u64 get_ibs_fetch_count(u64 config) | |
341 | { | |
342 | return (config & IBS_FETCH_CNT) >> 12; | |
343 | } | |
344 | ||
345 | static u64 get_ibs_op_count(u64 config) | |
346 | { | |
8b1e1363 RR |
347 | u64 count = 0; |
348 | ||
349 | if (config & IBS_OP_VAL) | |
350 | count += (config & IBS_OP_MAX_CNT) << 4; /* cnt rolled over */ | |
351 | ||
352 | if (ibs_caps & IBS_CAPS_RDWROPCNT) | |
353 | count += (config & IBS_OP_CUR_CNT) >> 32; | |
354 | ||
355 | return count; | |
db98c5fa RR |
356 | } |
357 | ||
358 | static void | |
359 | perf_ibs_event_update(struct perf_ibs *perf_ibs, struct perf_event *event, | |
c9574fe0 | 360 | u64 *config) |
db98c5fa | 361 | { |
c9574fe0 | 362 | u64 count = perf_ibs->get_count(*config); |
db98c5fa | 363 | |
8b1e1363 RR |
364 | /* |
365 | * Set width to 64 since we do not overflow on max width but | |
366 | * instead on max count. In perf_ibs_set_period() we clear | |
367 | * prev count manually on overflow. | |
368 | */ | |
369 | while (!perf_event_try_update(event, count, 64)) { | |
c9574fe0 RR |
370 | rdmsrl(event->hw.config_base, *config); |
371 | count = perf_ibs->get_count(*config); | |
db98c5fa RR |
372 | } |
373 | } | |
374 | ||
c9574fe0 RR |
375 | static inline void perf_ibs_enable_event(struct perf_ibs *perf_ibs, |
376 | struct hw_perf_event *hwc, u64 config) | |
db98c5fa | 377 | { |
c9574fe0 RR |
378 | wrmsrl(hwc->config_base, hwc->config | config | perf_ibs->enable_mask); |
379 | } | |
380 | ||
381 | /* | |
382 | * Erratum #420 Instruction-Based Sampling Engine May Generate | |
383 | * Interrupt that Cannot Be Cleared: | |
384 | * | |
385 | * Must clear counter mask first, then clear the enable bit. See | |
386 | * Revision Guide for AMD Family 10h Processors, Publication #41322. | |
387 | */ | |
388 | static inline void perf_ibs_disable_event(struct perf_ibs *perf_ibs, | |
389 | struct hw_perf_event *hwc, u64 config) | |
390 | { | |
391 | config &= ~perf_ibs->cnt_mask; | |
392 | wrmsrl(hwc->config_base, config); | |
393 | config &= ~perf_ibs->enable_mask; | |
394 | wrmsrl(hwc->config_base, config); | |
db98c5fa RR |
395 | } |
396 | ||
397 | /* | |
398 | * We cannot restore the ibs pmu state, so we always needs to update | |
399 | * the event while stopping it and then reset the state when starting | |
400 | * again. Thus, ignoring PERF_EF_RELOAD and PERF_EF_UPDATE flags in | |
401 | * perf_ibs_start()/perf_ibs_stop() and instead always do it. | |
402 | */ | |
4db2e8e6 RR |
403 | static void perf_ibs_start(struct perf_event *event, int flags) |
404 | { | |
405 | struct hw_perf_event *hwc = &event->hw; | |
406 | struct perf_ibs *perf_ibs = container_of(event->pmu, struct perf_ibs, pmu); | |
407 | struct cpu_perf_ibs *pcpu = this_cpu_ptr(perf_ibs->pcpu); | |
c9574fe0 | 408 | u64 period; |
4db2e8e6 | 409 | |
db98c5fa | 410 | if (WARN_ON_ONCE(!(hwc->state & PERF_HES_STOPPED))) |
4db2e8e6 RR |
411 | return; |
412 | ||
db98c5fa RR |
413 | WARN_ON_ONCE(!(hwc->state & PERF_HES_UPTODATE)); |
414 | hwc->state = 0; | |
415 | ||
c9574fe0 | 416 | perf_ibs_set_period(perf_ibs, hwc, &period); |
5a50f529 | 417 | /* |
85dc6002 PZ |
418 | * Set STARTED before enabling the hardware, such that a subsequent NMI |
419 | * must observe it. | |
5a50f529 | 420 | */ |
85dc6002 | 421 | set_bit(IBS_STARTED, pcpu->state); |
5a50f529 | 422 | clear_bit(IBS_STOPPING, pcpu->state); |
c9574fe0 | 423 | perf_ibs_enable_event(perf_ibs, hwc, period >> 4); |
db98c5fa RR |
424 | |
425 | perf_event_update_userpage(event); | |
4db2e8e6 RR |
426 | } |
427 | ||
428 | static void perf_ibs_stop(struct perf_event *event, int flags) | |
429 | { | |
430 | struct hw_perf_event *hwc = &event->hw; | |
431 | struct perf_ibs *perf_ibs = container_of(event->pmu, struct perf_ibs, pmu); | |
432 | struct cpu_perf_ibs *pcpu = this_cpu_ptr(perf_ibs->pcpu); | |
c9574fe0 | 433 | u64 config; |
db98c5fa | 434 | int stopping; |
4db2e8e6 | 435 | |
85dc6002 PZ |
436 | if (test_and_set_bit(IBS_STOPPING, pcpu->state)) |
437 | return; | |
438 | ||
5a50f529 | 439 | stopping = test_bit(IBS_STARTED, pcpu->state); |
4db2e8e6 | 440 | |
db98c5fa RR |
441 | if (!stopping && (hwc->state & PERF_HES_UPTODATE)) |
442 | return; | |
4db2e8e6 | 443 | |
c9574fe0 | 444 | rdmsrl(hwc->config_base, config); |
db98c5fa RR |
445 | |
446 | if (stopping) { | |
5a50f529 | 447 | /* |
85dc6002 | 448 | * Set STOPPED before disabling the hardware, such that it |
5a50f529 PZ |
449 | * must be visible to NMIs the moment we clear the EN bit, |
450 | * at which point we can generate an !VALID sample which | |
451 | * we need to consume. | |
452 | */ | |
85dc6002 | 453 | set_bit(IBS_STOPPED, pcpu->state); |
c9574fe0 | 454 | perf_ibs_disable_event(perf_ibs, hwc, config); |
5a50f529 PZ |
455 | /* |
456 | * Clear STARTED after disabling the hardware; if it were | |
457 | * cleared before an NMI hitting after the clear but before | |
458 | * clearing the EN bit might think it a spurious NMI and not | |
459 | * handle it. | |
460 | * | |
461 | * Clearing it after, however, creates the problem of the NMI | |
462 | * handler seeing STARTED but not having a valid sample. | |
463 | */ | |
464 | clear_bit(IBS_STARTED, pcpu->state); | |
db98c5fa RR |
465 | WARN_ON_ONCE(hwc->state & PERF_HES_STOPPED); |
466 | hwc->state |= PERF_HES_STOPPED; | |
467 | } | |
468 | ||
469 | if (hwc->state & PERF_HES_UPTODATE) | |
470 | return; | |
471 | ||
8b1e1363 RR |
472 | /* |
473 | * Clear valid bit to not count rollovers on update, rollovers | |
474 | * are only updated in the irq handler. | |
475 | */ | |
476 | config &= ~perf_ibs->valid_mask; | |
477 | ||
c9574fe0 | 478 | perf_ibs_event_update(perf_ibs, event, &config); |
db98c5fa | 479 | hwc->state |= PERF_HES_UPTODATE; |
4db2e8e6 RR |
480 | } |
481 | ||
b7169166 RR |
482 | static int perf_ibs_add(struct perf_event *event, int flags) |
483 | { | |
4db2e8e6 RR |
484 | struct perf_ibs *perf_ibs = container_of(event->pmu, struct perf_ibs, pmu); |
485 | struct cpu_perf_ibs *pcpu = this_cpu_ptr(perf_ibs->pcpu); | |
486 | ||
487 | if (test_and_set_bit(IBS_ENABLED, pcpu->state)) | |
488 | return -ENOSPC; | |
489 | ||
db98c5fa RR |
490 | event->hw.state = PERF_HES_UPTODATE | PERF_HES_STOPPED; |
491 | ||
4db2e8e6 RR |
492 | pcpu->event = event; |
493 | ||
494 | if (flags & PERF_EF_START) | |
495 | perf_ibs_start(event, PERF_EF_RELOAD); | |
496 | ||
b7169166 RR |
497 | return 0; |
498 | } | |
499 | ||
500 | static void perf_ibs_del(struct perf_event *event, int flags) | |
501 | { | |
4db2e8e6 RR |
502 | struct perf_ibs *perf_ibs = container_of(event->pmu, struct perf_ibs, pmu); |
503 | struct cpu_perf_ibs *pcpu = this_cpu_ptr(perf_ibs->pcpu); | |
504 | ||
505 | if (!test_and_clear_bit(IBS_ENABLED, pcpu->state)) | |
506 | return; | |
507 | ||
db98c5fa | 508 | perf_ibs_stop(event, PERF_EF_UPDATE); |
4db2e8e6 RR |
509 | |
510 | pcpu->event = NULL; | |
db98c5fa RR |
511 | |
512 | perf_event_update_userpage(event); | |
b7169166 RR |
513 | } |
514 | ||
4db2e8e6 RR |
515 | static void perf_ibs_read(struct perf_event *event) { } |
516 | ||
2e132b12 RR |
517 | PMU_FORMAT_ATTR(rand_en, "config:57"); |
518 | PMU_FORMAT_ATTR(cnt_ctl, "config:19"); | |
519 | ||
520 | static struct attribute *ibs_fetch_format_attrs[] = { | |
521 | &format_attr_rand_en.attr, | |
522 | NULL, | |
523 | }; | |
524 | ||
525 | static struct attribute *ibs_op_format_attrs[] = { | |
526 | NULL, /* &format_attr_cnt_ctl.attr if IBS_CAPS_OPCNT */ | |
527 | NULL, | |
528 | }; | |
529 | ||
51041943 RR |
530 | static struct perf_ibs perf_ibs_fetch = { |
531 | .pmu = { | |
532 | .task_ctx_nr = perf_invalid_context, | |
533 | ||
534 | .event_init = perf_ibs_init, | |
535 | .add = perf_ibs_add, | |
536 | .del = perf_ibs_del, | |
4db2e8e6 RR |
537 | .start = perf_ibs_start, |
538 | .stop = perf_ibs_stop, | |
539 | .read = perf_ibs_read, | |
51041943 RR |
540 | }, |
541 | .msr = MSR_AMD64_IBSFETCHCTL, | |
542 | .config_mask = IBS_FETCH_CONFIG_MASK, | |
543 | .cnt_mask = IBS_FETCH_MAX_CNT, | |
544 | .enable_mask = IBS_FETCH_ENABLE, | |
b7074f1f | 545 | .valid_mask = IBS_FETCH_VAL, |
db98c5fa | 546 | .max_period = IBS_FETCH_MAX_CNT << 4, |
b7074f1f RR |
547 | .offset_mask = { MSR_AMD64_IBSFETCH_REG_MASK }, |
548 | .offset_max = MSR_AMD64_IBSFETCH_REG_COUNT, | |
2e132b12 | 549 | .format_attrs = ibs_fetch_format_attrs, |
db98c5fa RR |
550 | |
551 | .get_count = get_ibs_fetch_count, | |
51041943 RR |
552 | }; |
553 | ||
554 | static struct perf_ibs perf_ibs_op = { | |
555 | .pmu = { | |
556 | .task_ctx_nr = perf_invalid_context, | |
557 | ||
558 | .event_init = perf_ibs_init, | |
559 | .add = perf_ibs_add, | |
560 | .del = perf_ibs_del, | |
4db2e8e6 RR |
561 | .start = perf_ibs_start, |
562 | .stop = perf_ibs_stop, | |
563 | .read = perf_ibs_read, | |
51041943 RR |
564 | }, |
565 | .msr = MSR_AMD64_IBSOPCTL, | |
566 | .config_mask = IBS_OP_CONFIG_MASK, | |
567 | .cnt_mask = IBS_OP_MAX_CNT, | |
568 | .enable_mask = IBS_OP_ENABLE, | |
b7074f1f | 569 | .valid_mask = IBS_OP_VAL, |
db98c5fa | 570 | .max_period = IBS_OP_MAX_CNT << 4, |
b7074f1f RR |
571 | .offset_mask = { MSR_AMD64_IBSOP_REG_MASK }, |
572 | .offset_max = MSR_AMD64_IBSOP_REG_COUNT, | |
2e132b12 | 573 | .format_attrs = ibs_op_format_attrs, |
db98c5fa RR |
574 | |
575 | .get_count = get_ibs_op_count, | |
b7169166 RR |
576 | }; |
577 | ||
b7074f1f RR |
578 | static int perf_ibs_handle_irq(struct perf_ibs *perf_ibs, struct pt_regs *iregs) |
579 | { | |
4db2e8e6 RR |
580 | struct cpu_perf_ibs *pcpu = this_cpu_ptr(perf_ibs->pcpu); |
581 | struct perf_event *event = pcpu->event; | |
b7074f1f RR |
582 | struct hw_perf_event *hwc = &event->hw; |
583 | struct perf_sample_data data; | |
584 | struct perf_raw_record raw; | |
585 | struct pt_regs regs; | |
586 | struct perf_ibs_data ibs_data; | |
d47e8238 | 587 | int offset, size, check_rip, offset_max, throttle = 0; |
b7074f1f | 588 | unsigned int msr; |
c9574fe0 | 589 | u64 *buf, *config, period; |
b7074f1f | 590 | |
4db2e8e6 | 591 | if (!test_bit(IBS_STARTED, pcpu->state)) { |
5a50f529 | 592 | fail: |
fc5fb2b5 RR |
593 | /* |
594 | * Catch spurious interrupts after stopping IBS: After | |
d82603c6 | 595 | * disabling IBS there could be still incoming NMIs |
fc5fb2b5 RR |
596 | * with samples that even have the valid bit cleared. |
597 | * Mark all this NMIs as handled. | |
598 | */ | |
85dc6002 | 599 | if (test_and_clear_bit(IBS_STOPPED, pcpu->state)) |
5a50f529 PZ |
600 | return 1; |
601 | ||
602 | return 0; | |
4db2e8e6 RR |
603 | } |
604 | ||
b7074f1f RR |
605 | msr = hwc->config_base; |
606 | buf = ibs_data.regs; | |
607 | rdmsrl(msr, *buf); | |
608 | if (!(*buf++ & perf_ibs->valid_mask)) | |
5a50f529 | 609 | goto fail; |
b7074f1f | 610 | |
c9574fe0 | 611 | config = &ibs_data.regs[0]; |
c75841a3 | 612 | perf_ibs_event_update(perf_ibs, event, config); |
fd0d000b | 613 | perf_sample_data_init(&data, 0, hwc->last_period); |
c9574fe0 | 614 | if (!perf_ibs_set_period(perf_ibs, hwc, &period)) |
d47e8238 RR |
615 | goto out; /* no sw counter overflow */ |
616 | ||
617 | ibs_data.caps = ibs_caps; | |
618 | size = 1; | |
619 | offset = 1; | |
620 | check_rip = (perf_ibs == &perf_ibs_op && (ibs_caps & IBS_CAPS_RIPINVALIDCHK)); | |
621 | if (event->attr.sample_type & PERF_SAMPLE_RAW) | |
622 | offset_max = perf_ibs->offset_max; | |
623 | else if (check_rip) | |
624 | offset_max = 2; | |
625 | else | |
626 | offset_max = 1; | |
627 | do { | |
628 | rdmsrl(msr + offset, *buf++); | |
629 | size++; | |
630 | offset = find_next_bit(perf_ibs->offset_mask, | |
631 | perf_ibs->offset_max, | |
632 | offset + 1); | |
633 | } while (offset < offset_max); | |
904cb367 AG |
634 | if (event->attr.sample_type & PERF_SAMPLE_RAW) { |
635 | /* | |
636 | * Read IbsBrTarget and IbsOpData4 separately | |
637 | * depending on their availability. | |
638 | * Can't add to offset_max as they are staggered | |
639 | */ | |
640 | if (ibs_caps & IBS_CAPS_BRNTRGT) { | |
641 | rdmsrl(MSR_AMD64_IBSBRTARGET, *buf++); | |
642 | size++; | |
643 | } | |
644 | if (ibs_caps & IBS_CAPS_OPDATA4) { | |
645 | rdmsrl(MSR_AMD64_IBSOPDATA4, *buf++); | |
646 | size++; | |
647 | } | |
648 | } | |
d47e8238 RR |
649 | ibs_data.size = sizeof(u64) * size; |
650 | ||
651 | regs = *iregs; | |
450bbd49 RR |
652 | if (check_rip && (ibs_data.regs[2] & IBS_RIP_INVALID)) { |
653 | regs.flags &= ~PERF_EFLAGS_EXACT; | |
654 | } else { | |
d07bdfd3 | 655 | set_linear_ip(®s, ibs_data.regs[1]); |
450bbd49 RR |
656 | regs.flags |= PERF_EFLAGS_EXACT; |
657 | } | |
c75841a3 | 658 | |
b7074f1f | 659 | if (event->attr.sample_type & PERF_SAMPLE_RAW) { |
7e3f977e DB |
660 | raw = (struct perf_raw_record){ |
661 | .frag = { | |
662 | .size = sizeof(u32) + ibs_data.size, | |
663 | .data = ibs_data.data, | |
664 | }, | |
665 | }; | |
b7074f1f RR |
666 | data.raw = &raw; |
667 | } | |
668 | ||
d47e8238 RR |
669 | throttle = perf_event_overflow(event, &data, ®s); |
670 | out: | |
c9574fe0 | 671 | if (throttle) |
0158b83f | 672 | perf_ibs_stop(event, 0); |
c9574fe0 RR |
673 | else |
674 | perf_ibs_enable_event(perf_ibs, hwc, period >> 4); | |
db98c5fa RR |
675 | |
676 | perf_event_update_userpage(event); | |
b7074f1f RR |
677 | |
678 | return 1; | |
679 | } | |
680 | ||
9326638c | 681 | static int |
b7074f1f RR |
682 | perf_ibs_nmi_handler(unsigned int cmd, struct pt_regs *regs) |
683 | { | |
c2872d38 | 684 | u64 stamp = sched_clock(); |
b7074f1f RR |
685 | int handled = 0; |
686 | ||
687 | handled += perf_ibs_handle_irq(&perf_ibs_fetch, regs); | |
688 | handled += perf_ibs_handle_irq(&perf_ibs_op, regs); | |
689 | ||
690 | if (handled) | |
691 | inc_irq_stat(apic_perf_irqs); | |
692 | ||
c2872d38 PZ |
693 | perf_sample_event_took(sched_clock() - stamp); |
694 | ||
b7074f1f RR |
695 | return handled; |
696 | } | |
9326638c | 697 | NOKPROBE_SYMBOL(perf_ibs_nmi_handler); |
b7074f1f | 698 | |
4db2e8e6 RR |
699 | static __init int perf_ibs_pmu_init(struct perf_ibs *perf_ibs, char *name) |
700 | { | |
701 | struct cpu_perf_ibs __percpu *pcpu; | |
702 | int ret; | |
703 | ||
704 | pcpu = alloc_percpu(struct cpu_perf_ibs); | |
705 | if (!pcpu) | |
706 | return -ENOMEM; | |
707 | ||
708 | perf_ibs->pcpu = pcpu; | |
709 | ||
2e132b12 RR |
710 | /* register attributes */ |
711 | if (perf_ibs->format_attrs[0]) { | |
712 | memset(&perf_ibs->format_group, 0, sizeof(perf_ibs->format_group)); | |
713 | perf_ibs->format_group.name = "format"; | |
714 | perf_ibs->format_group.attrs = perf_ibs->format_attrs; | |
715 | ||
716 | memset(&perf_ibs->attr_groups, 0, sizeof(perf_ibs->attr_groups)); | |
717 | perf_ibs->attr_groups[0] = &perf_ibs->format_group; | |
718 | perf_ibs->pmu.attr_groups = perf_ibs->attr_groups; | |
719 | } | |
720 | ||
4db2e8e6 RR |
721 | ret = perf_pmu_register(&perf_ibs->pmu, name, -1); |
722 | if (ret) { | |
723 | perf_ibs->pcpu = NULL; | |
724 | free_percpu(pcpu); | |
725 | } | |
726 | ||
727 | return ret; | |
728 | } | |
729 | ||
9744f7b7 | 730 | static __init void perf_event_ibs_init(void) |
b7169166 | 731 | { |
2e132b12 RR |
732 | struct attribute **attr = ibs_op_format_attrs; |
733 | ||
4db2e8e6 | 734 | perf_ibs_pmu_init(&perf_ibs_fetch, "ibs_fetch"); |
2e132b12 RR |
735 | |
736 | if (ibs_caps & IBS_CAPS_OPCNT) { | |
7bf35238 | 737 | perf_ibs_op.config_mask |= IBS_OP_CNT_CTL; |
2e132b12 RR |
738 | *attr++ = &format_attr_cnt_ctl.attr; |
739 | } | |
4db2e8e6 | 740 | perf_ibs_pmu_init(&perf_ibs_op, "ibs_op"); |
2e132b12 | 741 | |
fab06992 | 742 | register_nmi_handler(NMI_LOCAL, perf_ibs_nmi_handler, 0, "perf_ibs"); |
1b74dde7 | 743 | pr_info("perf: AMD IBS detected (0x%08x)\n", ibs_caps); |
b7169166 RR |
744 | } |
745 | ||
746 | #else /* defined(CONFIG_PERF_EVENTS) && defined(CONFIG_CPU_SUP_AMD) */ | |
747 | ||
9744f7b7 | 748 | static __init void perf_event_ibs_init(void) { } |
b7169166 RR |
749 | |
750 | #endif | |
751 | ||
752 | /* IBS - apic initialization, for perf and oprofile */ | |
753 | ||
754 | static __init u32 __get_ibs_caps(void) | |
755 | { | |
756 | u32 caps; | |
757 | unsigned int max_level; | |
758 | ||
759 | if (!boot_cpu_has(X86_FEATURE_IBS)) | |
760 | return 0; | |
761 | ||
762 | /* check IBS cpuid feature flags */ | |
763 | max_level = cpuid_eax(0x80000000); | |
764 | if (max_level < IBS_CPUID_FEATURES) | |
765 | return IBS_CAPS_DEFAULT; | |
766 | ||
767 | caps = cpuid_eax(IBS_CPUID_FEATURES); | |
768 | if (!(caps & IBS_CAPS_AVAIL)) | |
769 | /* cpuid flags not valid */ | |
770 | return IBS_CAPS_DEFAULT; | |
771 | ||
772 | return caps; | |
773 | } | |
774 | ||
775 | u32 get_ibs_caps(void) | |
776 | { | |
777 | return ibs_caps; | |
778 | } | |
779 | ||
780 | EXPORT_SYMBOL(get_ibs_caps); | |
781 | ||
782 | static inline int get_eilvt(int offset) | |
783 | { | |
784 | return !setup_APIC_eilvt(offset, 0, APIC_EILVT_MSG_NMI, 1); | |
785 | } | |
786 | ||
787 | static inline int put_eilvt(int offset) | |
788 | { | |
789 | return !setup_APIC_eilvt(offset, 0, 0, 1); | |
790 | } | |
791 | ||
792 | /* | |
793 | * Check and reserve APIC extended interrupt LVT offset for IBS if available. | |
794 | */ | |
795 | static inline int ibs_eilvt_valid(void) | |
796 | { | |
797 | int offset; | |
798 | u64 val; | |
799 | int valid = 0; | |
800 | ||
801 | preempt_disable(); | |
802 | ||
803 | rdmsrl(MSR_AMD64_IBSCTL, val); | |
804 | offset = val & IBSCTL_LVT_OFFSET_MASK; | |
805 | ||
806 | if (!(val & IBSCTL_LVT_OFFSET_VALID)) { | |
807 | pr_err(FW_BUG "cpu %d, invalid IBS interrupt offset %d (MSR%08X=0x%016llx)\n", | |
808 | smp_processor_id(), offset, MSR_AMD64_IBSCTL, val); | |
809 | goto out; | |
810 | } | |
811 | ||
812 | if (!get_eilvt(offset)) { | |
813 | pr_err(FW_BUG "cpu %d, IBS interrupt offset %d not available (MSR%08X=0x%016llx)\n", | |
814 | smp_processor_id(), offset, MSR_AMD64_IBSCTL, val); | |
815 | goto out; | |
816 | } | |
817 | ||
818 | valid = 1; | |
819 | out: | |
820 | preempt_enable(); | |
821 | ||
822 | return valid; | |
823 | } | |
824 | ||
825 | static int setup_ibs_ctl(int ibs_eilvt_off) | |
826 | { | |
827 | struct pci_dev *cpu_cfg; | |
828 | int nodes; | |
829 | u32 value = 0; | |
830 | ||
831 | nodes = 0; | |
832 | cpu_cfg = NULL; | |
833 | do { | |
834 | cpu_cfg = pci_get_device(PCI_VENDOR_ID_AMD, | |
835 | PCI_DEVICE_ID_AMD_10H_NB_MISC, | |
836 | cpu_cfg); | |
837 | if (!cpu_cfg) | |
838 | break; | |
839 | ++nodes; | |
840 | pci_write_config_dword(cpu_cfg, IBSCTL, ibs_eilvt_off | |
841 | | IBSCTL_LVT_OFFSET_VALID); | |
842 | pci_read_config_dword(cpu_cfg, IBSCTL, &value); | |
843 | if (value != (ibs_eilvt_off | IBSCTL_LVT_OFFSET_VALID)) { | |
844 | pci_dev_put(cpu_cfg); | |
1b74dde7 CY |
845 | pr_debug("Failed to setup IBS LVT offset, IBSCTL = 0x%08x\n", |
846 | value); | |
b7169166 RR |
847 | return -EINVAL; |
848 | } | |
849 | } while (1); | |
850 | ||
851 | if (!nodes) { | |
1b74dde7 | 852 | pr_debug("No CPU node configured for IBS\n"); |
b7169166 RR |
853 | return -ENODEV; |
854 | } | |
855 | ||
856 | return 0; | |
857 | } | |
858 | ||
859 | /* | |
860 | * This runs only on the current cpu. We try to find an LVT offset and | |
861 | * setup the local APIC. For this we must disable preemption. On | |
862 | * success we initialize all nodes with this offset. This updates then | |
863 | * the offset in the IBS_CTL per-node msr. The per-core APIC setup of | |
864 | * the IBS interrupt vector is handled by perf_ibs_cpu_notifier that | |
865 | * is using the new offset. | |
866 | */ | |
c796b205 | 867 | static void force_ibs_eilvt_setup(void) |
b7169166 RR |
868 | { |
869 | int offset; | |
870 | int ret; | |
871 | ||
872 | preempt_disable(); | |
873 | /* find the next free available EILVT entry, skip offset 0 */ | |
874 | for (offset = 1; offset < APIC_EILVT_NR_MAX; offset++) { | |
875 | if (get_eilvt(offset)) | |
876 | break; | |
877 | } | |
878 | preempt_enable(); | |
879 | ||
880 | if (offset == APIC_EILVT_NR_MAX) { | |
1b74dde7 | 881 | pr_debug("No EILVT entry available\n"); |
c796b205 | 882 | return; |
b7169166 RR |
883 | } |
884 | ||
885 | ret = setup_ibs_ctl(offset); | |
886 | if (ret) | |
887 | goto out; | |
888 | ||
c796b205 | 889 | if (!ibs_eilvt_valid()) |
b7169166 | 890 | goto out; |
b7169166 | 891 | |
16e5294e | 892 | pr_info("IBS: LVT offset %d assigned\n", offset); |
b7169166 | 893 | |
c796b205 | 894 | return; |
b7169166 RR |
895 | out: |
896 | preempt_disable(); | |
897 | put_eilvt(offset); | |
898 | preempt_enable(); | |
c796b205 | 899 | return; |
b7169166 RR |
900 | } |
901 | ||
bee09ed9 RR |
902 | static void ibs_eilvt_setup(void) |
903 | { | |
904 | /* | |
905 | * Force LVT offset assignment for family 10h: The offsets are | |
906 | * not assigned by the BIOS for this family, so the OS is | |
907 | * responsible for doing it. If the OS assignment fails, fall | |
908 | * back to BIOS settings and try to setup this. | |
909 | */ | |
910 | if (boot_cpu_data.x86 == 0x10) | |
911 | force_ibs_eilvt_setup(); | |
912 | } | |
913 | ||
b7169166 RR |
914 | static inline int get_ibs_lvt_offset(void) |
915 | { | |
916 | u64 val; | |
917 | ||
918 | rdmsrl(MSR_AMD64_IBSCTL, val); | |
919 | if (!(val & IBSCTL_LVT_OFFSET_VALID)) | |
920 | return -EINVAL; | |
921 | ||
922 | return val & IBSCTL_LVT_OFFSET_MASK; | |
923 | } | |
924 | ||
9744f7b7 | 925 | static void setup_APIC_ibs(void) |
b7169166 RR |
926 | { |
927 | int offset; | |
928 | ||
929 | offset = get_ibs_lvt_offset(); | |
930 | if (offset < 0) | |
931 | goto failed; | |
932 | ||
933 | if (!setup_APIC_eilvt(offset, 0, APIC_EILVT_MSG_NMI, 0)) | |
934 | return; | |
935 | failed: | |
936 | pr_warn("perf: IBS APIC setup failed on cpu #%d\n", | |
937 | smp_processor_id()); | |
938 | } | |
939 | ||
9744f7b7 | 940 | static void clear_APIC_ibs(void) |
b7169166 RR |
941 | { |
942 | int offset; | |
943 | ||
944 | offset = get_ibs_lvt_offset(); | |
945 | if (offset >= 0) | |
946 | setup_APIC_eilvt(offset, 0, APIC_EILVT_MSG_FIX, 1); | |
947 | } | |
948 | ||
9744f7b7 TG |
949 | static int x86_pmu_amd_ibs_starting_cpu(unsigned int cpu) |
950 | { | |
951 | setup_APIC_ibs(); | |
952 | return 0; | |
953 | } | |
954 | ||
bee09ed9 RR |
955 | #ifdef CONFIG_PM |
956 | ||
957 | static int perf_ibs_suspend(void) | |
958 | { | |
9744f7b7 | 959 | clear_APIC_ibs(); |
bee09ed9 RR |
960 | return 0; |
961 | } | |
962 | ||
963 | static void perf_ibs_resume(void) | |
964 | { | |
965 | ibs_eilvt_setup(); | |
9744f7b7 | 966 | setup_APIC_ibs(); |
bee09ed9 RR |
967 | } |
968 | ||
969 | static struct syscore_ops perf_ibs_syscore_ops = { | |
970 | .resume = perf_ibs_resume, | |
971 | .suspend = perf_ibs_suspend, | |
972 | }; | |
973 | ||
974 | static void perf_ibs_pm_init(void) | |
975 | { | |
976 | register_syscore_ops(&perf_ibs_syscore_ops); | |
977 | } | |
978 | ||
979 | #else | |
980 | ||
981 | static inline void perf_ibs_pm_init(void) { } | |
982 | ||
983 | #endif | |
984 | ||
9744f7b7 | 985 | static int x86_pmu_amd_ibs_dying_cpu(unsigned int cpu) |
b7169166 | 986 | { |
9744f7b7 TG |
987 | clear_APIC_ibs(); |
988 | return 0; | |
b7169166 RR |
989 | } |
990 | ||
991 | static __init int amd_ibs_init(void) | |
992 | { | |
993 | u32 caps; | |
b7169166 RR |
994 | |
995 | caps = __get_ibs_caps(); | |
996 | if (!caps) | |
997 | return -ENODEV; /* ibs not supported by the cpu */ | |
998 | ||
bee09ed9 | 999 | ibs_eilvt_setup(); |
16e5294e RR |
1000 | |
1001 | if (!ibs_eilvt_valid()) | |
9744f7b7 | 1002 | return -EINVAL; |
b7169166 | 1003 | |
bee09ed9 | 1004 | perf_ibs_pm_init(); |
9744f7b7 | 1005 | |
b7169166 RR |
1006 | ibs_caps = caps; |
1007 | /* make ibs_caps visible to other cpus: */ | |
1008 | smp_mb(); | |
9744f7b7 TG |
1009 | /* |
1010 | * x86_pmu_amd_ibs_starting_cpu will be called from core on | |
1011 | * all online cpus. | |
1012 | */ | |
1013 | cpuhp_setup_state(CPUHP_AP_PERF_X86_AMD_IBS_STARTING, | |
7e164ce4 | 1014 | "perf/x86/amd/ibs:starting", |
9744f7b7 TG |
1015 | x86_pmu_amd_ibs_starting_cpu, |
1016 | x86_pmu_amd_ibs_dying_cpu); | |
b7169166 | 1017 | |
9744f7b7 TG |
1018 | perf_event_ibs_init(); |
1019 | ||
1020 | return 0; | |
b7169166 RR |
1021 | } |
1022 | ||
1023 | /* Since we need the pci subsystem to init ibs we can't do this earlier: */ | |
1024 | device_initcall(amd_ibs_init); |