]>
Commit | Line | Data |
---|---|---|
d5d9696b WD |
1 | /* |
2 | * Perf support for the Statistical Profiling Extension, introduced as | |
3 | * part of ARMv8.2. | |
4 | * | |
5 | * This program is free software; you can redistribute it and/or modify | |
6 | * it under the terms of the GNU General Public License version 2 as | |
7 | * published by the Free Software Foundation. | |
8 | * | |
9 | * This program is distributed in the hope that it will be useful, | |
10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
12 | * GNU General Public License for more details. | |
13 | * | |
14 | * You should have received a copy of the GNU General Public License | |
15 | * along with this program. If not, see <http://www.gnu.org/licenses/>. | |
16 | * | |
17 | * Copyright (C) 2016 ARM Limited | |
18 | * | |
19 | * Author: Will Deacon <will.deacon@arm.com> | |
20 | */ | |
21 | ||
22 | #define PMUNAME "arm_spe" | |
23 | #define DRVNAME PMUNAME "_pmu" | |
24 | #define pr_fmt(fmt) DRVNAME ": " fmt | |
25 | ||
26 | #include <linux/cpuhotplug.h> | |
27 | #include <linux/interrupt.h> | |
28 | #include <linux/irq.h> | |
29 | #include <linux/module.h> | |
30 | #include <linux/of_address.h> | |
31 | #include <linux/of_device.h> | |
32 | #include <linux/perf_event.h> | |
33 | #include <linux/platform_device.h> | |
34 | #include <linux/slab.h> | |
35 | ||
36 | #include <asm/sysreg.h> | |
37 | ||
38 | #define ARM_SPE_BUF_PAD_BYTE 0 | |
39 | ||
40 | struct arm_spe_pmu_buf { | |
41 | int nr_pages; | |
42 | bool snapshot; | |
43 | void *base; | |
44 | }; | |
45 | ||
46 | struct arm_spe_pmu { | |
47 | struct pmu pmu; | |
48 | struct platform_device *pdev; | |
49 | cpumask_t supported_cpus; | |
50 | struct hlist_node hotplug_node; | |
51 | ||
52 | int irq; /* PPI */ | |
53 | ||
54 | u16 min_period; | |
55 | u16 counter_sz; | |
56 | ||
57 | #define SPE_PMU_FEAT_FILT_EVT (1UL << 0) | |
58 | #define SPE_PMU_FEAT_FILT_TYP (1UL << 1) | |
59 | #define SPE_PMU_FEAT_FILT_LAT (1UL << 2) | |
60 | #define SPE_PMU_FEAT_ARCH_INST (1UL << 3) | |
61 | #define SPE_PMU_FEAT_LDS (1UL << 4) | |
62 | #define SPE_PMU_FEAT_ERND (1UL << 5) | |
63 | #define SPE_PMU_FEAT_DEV_PROBED (1UL << 63) | |
64 | u64 features; | |
65 | ||
66 | u16 max_record_sz; | |
67 | u16 align; | |
68 | struct perf_output_handle __percpu *handle; | |
69 | }; | |
70 | ||
71 | #define to_spe_pmu(p) (container_of(p, struct arm_spe_pmu, pmu)) | |
72 | ||
73 | /* Convert a free-running index from perf into an SPE buffer offset */ | |
74 | #define PERF_IDX2OFF(idx, buf) ((idx) % ((buf)->nr_pages << PAGE_SHIFT)) | |
75 | ||
76 | /* Keep track of our dynamic hotplug state */ | |
77 | static enum cpuhp_state arm_spe_pmu_online; | |
78 | ||
79 | enum arm_spe_pmu_buf_fault_action { | |
80 | SPE_PMU_BUF_FAULT_ACT_SPURIOUS, | |
81 | SPE_PMU_BUF_FAULT_ACT_FATAL, | |
82 | SPE_PMU_BUF_FAULT_ACT_OK, | |
83 | }; | |
84 | ||
85 | /* This sysfs gunk was really good fun to write. */ | |
86 | enum arm_spe_pmu_capabilities { | |
87 | SPE_PMU_CAP_ARCH_INST = 0, | |
88 | SPE_PMU_CAP_ERND, | |
89 | SPE_PMU_CAP_FEAT_MAX, | |
90 | SPE_PMU_CAP_CNT_SZ = SPE_PMU_CAP_FEAT_MAX, | |
91 | SPE_PMU_CAP_MIN_IVAL, | |
92 | }; | |
93 | ||
94 | static int arm_spe_pmu_feat_caps[SPE_PMU_CAP_FEAT_MAX] = { | |
95 | [SPE_PMU_CAP_ARCH_INST] = SPE_PMU_FEAT_ARCH_INST, | |
96 | [SPE_PMU_CAP_ERND] = SPE_PMU_FEAT_ERND, | |
97 | }; | |
98 | ||
99 | static u32 arm_spe_pmu_cap_get(struct arm_spe_pmu *spe_pmu, int cap) | |
100 | { | |
101 | if (cap < SPE_PMU_CAP_FEAT_MAX) | |
102 | return !!(spe_pmu->features & arm_spe_pmu_feat_caps[cap]); | |
103 | ||
104 | switch (cap) { | |
105 | case SPE_PMU_CAP_CNT_SZ: | |
106 | return spe_pmu->counter_sz; | |
107 | case SPE_PMU_CAP_MIN_IVAL: | |
108 | return spe_pmu->min_period; | |
109 | default: | |
110 | WARN(1, "unknown cap %d\n", cap); | |
111 | } | |
112 | ||
113 | return 0; | |
114 | } | |
115 | ||
116 | static ssize_t arm_spe_pmu_cap_show(struct device *dev, | |
117 | struct device_attribute *attr, | |
118 | char *buf) | |
119 | { | |
120 | struct platform_device *pdev = to_platform_device(dev); | |
121 | struct arm_spe_pmu *spe_pmu = platform_get_drvdata(pdev); | |
122 | struct dev_ext_attribute *ea = | |
123 | container_of(attr, struct dev_ext_attribute, attr); | |
124 | int cap = (long)ea->var; | |
125 | ||
126 | return snprintf(buf, PAGE_SIZE, "%u\n", | |
127 | arm_spe_pmu_cap_get(spe_pmu, cap)); | |
128 | } | |
129 | ||
130 | #define SPE_EXT_ATTR_ENTRY(_name, _func, _var) \ | |
131 | &((struct dev_ext_attribute[]) { \ | |
132 | { __ATTR(_name, S_IRUGO, _func, NULL), (void *)_var } \ | |
133 | })[0].attr.attr | |
134 | ||
135 | #define SPE_CAP_EXT_ATTR_ENTRY(_name, _var) \ | |
136 | SPE_EXT_ATTR_ENTRY(_name, arm_spe_pmu_cap_show, _var) | |
137 | ||
138 | static struct attribute *arm_spe_pmu_cap_attr[] = { | |
139 | SPE_CAP_EXT_ATTR_ENTRY(arch_inst, SPE_PMU_CAP_ARCH_INST), | |
140 | SPE_CAP_EXT_ATTR_ENTRY(ernd, SPE_PMU_CAP_ERND), | |
141 | SPE_CAP_EXT_ATTR_ENTRY(count_size, SPE_PMU_CAP_CNT_SZ), | |
142 | SPE_CAP_EXT_ATTR_ENTRY(min_interval, SPE_PMU_CAP_MIN_IVAL), | |
143 | NULL, | |
144 | }; | |
145 | ||
146 | static struct attribute_group arm_spe_pmu_cap_group = { | |
147 | .name = "caps", | |
148 | .attrs = arm_spe_pmu_cap_attr, | |
149 | }; | |
150 | ||
151 | /* User ABI */ | |
152 | #define ATTR_CFG_FLD_ts_enable_CFG config /* PMSCR_EL1.TS */ | |
153 | #define ATTR_CFG_FLD_ts_enable_LO 0 | |
154 | #define ATTR_CFG_FLD_ts_enable_HI 0 | |
155 | #define ATTR_CFG_FLD_pa_enable_CFG config /* PMSCR_EL1.PA */ | |
156 | #define ATTR_CFG_FLD_pa_enable_LO 1 | |
157 | #define ATTR_CFG_FLD_pa_enable_HI 1 | |
158 | #define ATTR_CFG_FLD_pct_enable_CFG config /* PMSCR_EL1.PCT */ | |
159 | #define ATTR_CFG_FLD_pct_enable_LO 2 | |
160 | #define ATTR_CFG_FLD_pct_enable_HI 2 | |
161 | #define ATTR_CFG_FLD_jitter_CFG config /* PMSIRR_EL1.RND */ | |
162 | #define ATTR_CFG_FLD_jitter_LO 16 | |
163 | #define ATTR_CFG_FLD_jitter_HI 16 | |
164 | #define ATTR_CFG_FLD_branch_filter_CFG config /* PMSFCR_EL1.B */ | |
165 | #define ATTR_CFG_FLD_branch_filter_LO 32 | |
166 | #define ATTR_CFG_FLD_branch_filter_HI 32 | |
167 | #define ATTR_CFG_FLD_load_filter_CFG config /* PMSFCR_EL1.LD */ | |
168 | #define ATTR_CFG_FLD_load_filter_LO 33 | |
169 | #define ATTR_CFG_FLD_load_filter_HI 33 | |
170 | #define ATTR_CFG_FLD_store_filter_CFG config /* PMSFCR_EL1.ST */ | |
171 | #define ATTR_CFG_FLD_store_filter_LO 34 | |
172 | #define ATTR_CFG_FLD_store_filter_HI 34 | |
173 | ||
174 | #define ATTR_CFG_FLD_event_filter_CFG config1 /* PMSEVFR_EL1 */ | |
175 | #define ATTR_CFG_FLD_event_filter_LO 0 | |
176 | #define ATTR_CFG_FLD_event_filter_HI 63 | |
177 | ||
178 | #define ATTR_CFG_FLD_min_latency_CFG config2 /* PMSLATFR_EL1.MINLAT */ | |
179 | #define ATTR_CFG_FLD_min_latency_LO 0 | |
180 | #define ATTR_CFG_FLD_min_latency_HI 11 | |
181 | ||
182 | /* Why does everything I do descend into this? */ | |
183 | #define __GEN_PMU_FORMAT_ATTR(cfg, lo, hi) \ | |
184 | (lo) == (hi) ? #cfg ":" #lo "\n" : #cfg ":" #lo "-" #hi | |
185 | ||
186 | #define _GEN_PMU_FORMAT_ATTR(cfg, lo, hi) \ | |
187 | __GEN_PMU_FORMAT_ATTR(cfg, lo, hi) | |
188 | ||
189 | #define GEN_PMU_FORMAT_ATTR(name) \ | |
190 | PMU_FORMAT_ATTR(name, \ | |
191 | _GEN_PMU_FORMAT_ATTR(ATTR_CFG_FLD_##name##_CFG, \ | |
192 | ATTR_CFG_FLD_##name##_LO, \ | |
193 | ATTR_CFG_FLD_##name##_HI)) | |
194 | ||
195 | #define _ATTR_CFG_GET_FLD(attr, cfg, lo, hi) \ | |
196 | ((((attr)->cfg) >> lo) & GENMASK(hi - lo, 0)) | |
197 | ||
198 | #define ATTR_CFG_GET_FLD(attr, name) \ | |
199 | _ATTR_CFG_GET_FLD(attr, \ | |
200 | ATTR_CFG_FLD_##name##_CFG, \ | |
201 | ATTR_CFG_FLD_##name##_LO, \ | |
202 | ATTR_CFG_FLD_##name##_HI) | |
203 | ||
204 | GEN_PMU_FORMAT_ATTR(ts_enable); | |
205 | GEN_PMU_FORMAT_ATTR(pa_enable); | |
206 | GEN_PMU_FORMAT_ATTR(pct_enable); | |
207 | GEN_PMU_FORMAT_ATTR(jitter); | |
208 | GEN_PMU_FORMAT_ATTR(branch_filter); | |
209 | GEN_PMU_FORMAT_ATTR(load_filter); | |
210 | GEN_PMU_FORMAT_ATTR(store_filter); | |
211 | GEN_PMU_FORMAT_ATTR(event_filter); | |
212 | GEN_PMU_FORMAT_ATTR(min_latency); | |
213 | ||
214 | static struct attribute *arm_spe_pmu_formats_attr[] = { | |
215 | &format_attr_ts_enable.attr, | |
216 | &format_attr_pa_enable.attr, | |
217 | &format_attr_pct_enable.attr, | |
218 | &format_attr_jitter.attr, | |
219 | &format_attr_branch_filter.attr, | |
220 | &format_attr_load_filter.attr, | |
221 | &format_attr_store_filter.attr, | |
222 | &format_attr_event_filter.attr, | |
223 | &format_attr_min_latency.attr, | |
224 | NULL, | |
225 | }; | |
226 | ||
227 | static struct attribute_group arm_spe_pmu_format_group = { | |
228 | .name = "format", | |
229 | .attrs = arm_spe_pmu_formats_attr, | |
230 | }; | |
231 | ||
232 | static ssize_t arm_spe_pmu_get_attr_cpumask(struct device *dev, | |
233 | struct device_attribute *attr, | |
234 | char *buf) | |
235 | { | |
236 | struct platform_device *pdev = to_platform_device(dev); | |
237 | struct arm_spe_pmu *spe_pmu = platform_get_drvdata(pdev); | |
238 | ||
239 | return cpumap_print_to_pagebuf(true, buf, &spe_pmu->supported_cpus); | |
240 | } | |
241 | static DEVICE_ATTR(cpumask, S_IRUGO, arm_spe_pmu_get_attr_cpumask, NULL); | |
242 | ||
243 | static struct attribute *arm_spe_pmu_attrs[] = { | |
244 | &dev_attr_cpumask.attr, | |
245 | NULL, | |
246 | }; | |
247 | ||
248 | static struct attribute_group arm_spe_pmu_group = { | |
249 | .attrs = arm_spe_pmu_attrs, | |
250 | }; | |
251 | ||
252 | static const struct attribute_group *arm_spe_pmu_attr_groups[] = { | |
253 | &arm_spe_pmu_group, | |
254 | &arm_spe_pmu_cap_group, | |
255 | &arm_spe_pmu_format_group, | |
256 | NULL, | |
257 | }; | |
258 | ||
259 | /* Convert between user ABI and register values */ | |
260 | static u64 arm_spe_event_to_pmscr(struct perf_event *event) | |
261 | { | |
262 | struct perf_event_attr *attr = &event->attr; | |
263 | u64 reg = 0; | |
264 | ||
265 | reg |= ATTR_CFG_GET_FLD(attr, ts_enable) << SYS_PMSCR_EL1_TS_SHIFT; | |
266 | reg |= ATTR_CFG_GET_FLD(attr, pa_enable) << SYS_PMSCR_EL1_PA_SHIFT; | |
267 | reg |= ATTR_CFG_GET_FLD(attr, pct_enable) << SYS_PMSCR_EL1_PCT_SHIFT; | |
268 | ||
269 | if (!attr->exclude_user) | |
270 | reg |= BIT(SYS_PMSCR_EL1_E0SPE_SHIFT); | |
271 | ||
272 | if (!attr->exclude_kernel) | |
273 | reg |= BIT(SYS_PMSCR_EL1_E1SPE_SHIFT); | |
274 | ||
275 | if (IS_ENABLED(CONFIG_PID_IN_CONTEXTIDR) && capable(CAP_SYS_ADMIN)) | |
276 | reg |= BIT(SYS_PMSCR_EL1_CX_SHIFT); | |
277 | ||
278 | return reg; | |
279 | } | |
280 | ||
281 | static void arm_spe_event_sanitise_period(struct perf_event *event) | |
282 | { | |
283 | struct arm_spe_pmu *spe_pmu = to_spe_pmu(event->pmu); | |
284 | u64 period = event->hw.sample_period; | |
285 | u64 max_period = SYS_PMSIRR_EL1_INTERVAL_MASK | |
286 | << SYS_PMSIRR_EL1_INTERVAL_SHIFT; | |
287 | ||
288 | if (period < spe_pmu->min_period) | |
289 | period = spe_pmu->min_period; | |
290 | else if (period > max_period) | |
291 | period = max_period; | |
292 | else | |
293 | period &= max_period; | |
294 | ||
295 | event->hw.sample_period = period; | |
296 | } | |
297 | ||
298 | static u64 arm_spe_event_to_pmsirr(struct perf_event *event) | |
299 | { | |
300 | struct perf_event_attr *attr = &event->attr; | |
301 | u64 reg = 0; | |
302 | ||
303 | arm_spe_event_sanitise_period(event); | |
304 | ||
305 | reg |= ATTR_CFG_GET_FLD(attr, jitter) << SYS_PMSIRR_EL1_RND_SHIFT; | |
306 | reg |= event->hw.sample_period; | |
307 | ||
308 | return reg; | |
309 | } | |
310 | ||
311 | static u64 arm_spe_event_to_pmsfcr(struct perf_event *event) | |
312 | { | |
313 | struct perf_event_attr *attr = &event->attr; | |
314 | u64 reg = 0; | |
315 | ||
316 | reg |= ATTR_CFG_GET_FLD(attr, load_filter) << SYS_PMSFCR_EL1_LD_SHIFT; | |
317 | reg |= ATTR_CFG_GET_FLD(attr, store_filter) << SYS_PMSFCR_EL1_ST_SHIFT; | |
318 | reg |= ATTR_CFG_GET_FLD(attr, branch_filter) << SYS_PMSFCR_EL1_B_SHIFT; | |
319 | ||
320 | if (reg) | |
321 | reg |= BIT(SYS_PMSFCR_EL1_FT_SHIFT); | |
322 | ||
323 | if (ATTR_CFG_GET_FLD(attr, event_filter)) | |
324 | reg |= BIT(SYS_PMSFCR_EL1_FE_SHIFT); | |
325 | ||
326 | if (ATTR_CFG_GET_FLD(attr, min_latency)) | |
327 | reg |= BIT(SYS_PMSFCR_EL1_FL_SHIFT); | |
328 | ||
329 | return reg; | |
330 | } | |
331 | ||
332 | static u64 arm_spe_event_to_pmsevfr(struct perf_event *event) | |
333 | { | |
334 | struct perf_event_attr *attr = &event->attr; | |
335 | return ATTR_CFG_GET_FLD(attr, event_filter); | |
336 | } | |
337 | ||
338 | static u64 arm_spe_event_to_pmslatfr(struct perf_event *event) | |
339 | { | |
340 | struct perf_event_attr *attr = &event->attr; | |
341 | return ATTR_CFG_GET_FLD(attr, min_latency) | |
342 | << SYS_PMSLATFR_EL1_MINLAT_SHIFT; | |
343 | } | |
344 | ||
345 | static void arm_spe_pmu_pad_buf(struct perf_output_handle *handle, int len) | |
346 | { | |
347 | struct arm_spe_pmu_buf *buf = perf_get_aux(handle); | |
348 | u64 head = PERF_IDX2OFF(handle->head, buf); | |
349 | ||
350 | memset(buf->base + head, ARM_SPE_BUF_PAD_BYTE, len); | |
351 | if (!buf->snapshot) | |
352 | perf_aux_output_skip(handle, len); | |
353 | } | |
354 | ||
355 | static u64 arm_spe_pmu_next_snapshot_off(struct perf_output_handle *handle) | |
356 | { | |
357 | struct arm_spe_pmu_buf *buf = perf_get_aux(handle); | |
358 | struct arm_spe_pmu *spe_pmu = to_spe_pmu(handle->event->pmu); | |
359 | u64 head = PERF_IDX2OFF(handle->head, buf); | |
360 | u64 limit = buf->nr_pages * PAGE_SIZE; | |
361 | ||
362 | /* | |
363 | * The trace format isn't parseable in reverse, so clamp | |
364 | * the limit to half of the buffer size in snapshot mode | |
365 | * so that the worst case is half a buffer of records, as | |
366 | * opposed to a single record. | |
367 | */ | |
368 | if (head < limit >> 1) | |
369 | limit >>= 1; | |
370 | ||
371 | /* | |
372 | * If we're within max_record_sz of the limit, we must | |
373 | * pad, move the head index and recompute the limit. | |
374 | */ | |
375 | if (limit - head < spe_pmu->max_record_sz) { | |
376 | arm_spe_pmu_pad_buf(handle, limit - head); | |
377 | handle->head = PERF_IDX2OFF(limit, buf); | |
378 | limit = ((buf->nr_pages * PAGE_SIZE) >> 1) + handle->head; | |
379 | } | |
380 | ||
381 | return limit; | |
382 | } | |
383 | ||
384 | static u64 __arm_spe_pmu_next_off(struct perf_output_handle *handle) | |
385 | { | |
386 | struct arm_spe_pmu *spe_pmu = to_spe_pmu(handle->event->pmu); | |
387 | struct arm_spe_pmu_buf *buf = perf_get_aux(handle); | |
388 | const u64 bufsize = buf->nr_pages * PAGE_SIZE; | |
389 | u64 limit = bufsize; | |
390 | u64 head, tail, wakeup; | |
391 | ||
392 | /* | |
393 | * The head can be misaligned for two reasons: | |
394 | * | |
395 | * 1. The hardware left PMBPTR pointing to the first byte after | |
396 | * a record when generating a buffer management event. | |
397 | * | |
398 | * 2. We used perf_aux_output_skip to consume handle->size bytes | |
399 | * and CIRC_SPACE was used to compute the size, which always | |
400 | * leaves one entry free. | |
401 | * | |
402 | * Deal with this by padding to the next alignment boundary and | |
403 | * moving the head index. If we run out of buffer space, we'll | |
404 | * reduce handle->size to zero and end up reporting truncation. | |
405 | */ | |
406 | head = PERF_IDX2OFF(handle->head, buf); | |
407 | if (!IS_ALIGNED(head, spe_pmu->align)) { | |
408 | unsigned long delta = roundup(head, spe_pmu->align) - head; | |
409 | ||
410 | delta = min(delta, handle->size); | |
411 | arm_spe_pmu_pad_buf(handle, delta); | |
412 | head = PERF_IDX2OFF(handle->head, buf); | |
413 | } | |
414 | ||
415 | /* If we've run out of free space, then nothing more to do */ | |
416 | if (!handle->size) | |
417 | goto no_space; | |
418 | ||
419 | /* Compute the tail and wakeup indices now that we've aligned head */ | |
420 | tail = PERF_IDX2OFF(handle->head + handle->size, buf); | |
421 | wakeup = PERF_IDX2OFF(handle->wakeup, buf); | |
422 | ||
423 | /* | |
424 | * Avoid clobbering unconsumed data. We know we have space, so | |
425 | * if we see head == tail we know that the buffer is empty. If | |
426 | * head > tail, then there's nothing to clobber prior to | |
427 | * wrapping. | |
428 | */ | |
429 | if (head < tail) | |
430 | limit = round_down(tail, PAGE_SIZE); | |
431 | ||
432 | /* | |
433 | * Wakeup may be arbitrarily far into the future. If it's not in | |
434 | * the current generation, either we'll wrap before hitting it, | |
435 | * or it's in the past and has been handled already. | |
436 | * | |
437 | * If there's a wakeup before we wrap, arrange to be woken up by | |
438 | * the page boundary following it. Keep the tail boundary if | |
439 | * that's lower. | |
440 | */ | |
441 | if (handle->wakeup < (handle->head + handle->size) && head <= wakeup) | |
442 | limit = min(limit, round_up(wakeup, PAGE_SIZE)); | |
443 | ||
444 | if (limit > head) | |
445 | return limit; | |
446 | ||
447 | arm_spe_pmu_pad_buf(handle, handle->size); | |
448 | no_space: | |
449 | perf_aux_output_flag(handle, PERF_AUX_FLAG_TRUNCATED); | |
450 | perf_aux_output_end(handle, 0); | |
451 | return 0; | |
452 | } | |
453 | ||
454 | static u64 arm_spe_pmu_next_off(struct perf_output_handle *handle) | |
455 | { | |
456 | struct arm_spe_pmu_buf *buf = perf_get_aux(handle); | |
457 | struct arm_spe_pmu *spe_pmu = to_spe_pmu(handle->event->pmu); | |
458 | u64 limit = __arm_spe_pmu_next_off(handle); | |
459 | u64 head = PERF_IDX2OFF(handle->head, buf); | |
460 | ||
461 | /* | |
462 | * If the head has come too close to the end of the buffer, | |
463 | * then pad to the end and recompute the limit. | |
464 | */ | |
465 | if (limit && (limit - head < spe_pmu->max_record_sz)) { | |
466 | arm_spe_pmu_pad_buf(handle, limit - head); | |
467 | limit = __arm_spe_pmu_next_off(handle); | |
468 | } | |
469 | ||
470 | return limit; | |
471 | } | |
472 | ||
473 | static void arm_spe_perf_aux_output_begin(struct perf_output_handle *handle, | |
474 | struct perf_event *event) | |
475 | { | |
476 | u64 base, limit; | |
477 | struct arm_spe_pmu_buf *buf; | |
478 | ||
479 | /* Start a new aux session */ | |
480 | buf = perf_aux_output_begin(handle, event); | |
481 | if (!buf) { | |
482 | event->hw.state |= PERF_HES_STOPPED; | |
483 | /* | |
484 | * We still need to clear the limit pointer, since the | |
485 | * profiler might only be disabled by virtue of a fault. | |
486 | */ | |
487 | limit = 0; | |
488 | goto out_write_limit; | |
489 | } | |
490 | ||
491 | limit = buf->snapshot ? arm_spe_pmu_next_snapshot_off(handle) | |
492 | : arm_spe_pmu_next_off(handle); | |
493 | if (limit) | |
494 | limit |= BIT(SYS_PMBLIMITR_EL1_E_SHIFT); | |
495 | ||
496 | limit += (u64)buf->base; | |
497 | base = (u64)buf->base + PERF_IDX2OFF(handle->head, buf); | |
498 | write_sysreg_s(base, SYS_PMBPTR_EL1); | |
499 | ||
500 | out_write_limit: | |
501 | write_sysreg_s(limit, SYS_PMBLIMITR_EL1); | |
502 | } | |
503 | ||
504 | static void arm_spe_perf_aux_output_end(struct perf_output_handle *handle) | |
505 | { | |
506 | struct arm_spe_pmu_buf *buf = perf_get_aux(handle); | |
507 | u64 offset, size; | |
508 | ||
509 | offset = read_sysreg_s(SYS_PMBPTR_EL1) - (u64)buf->base; | |
510 | size = offset - PERF_IDX2OFF(handle->head, buf); | |
511 | ||
512 | if (buf->snapshot) | |
513 | handle->head = offset; | |
514 | ||
515 | perf_aux_output_end(handle, size); | |
516 | } | |
517 | ||
518 | static void arm_spe_pmu_disable_and_drain_local(void) | |
519 | { | |
520 | /* Disable profiling at EL0 and EL1 */ | |
521 | write_sysreg_s(0, SYS_PMSCR_EL1); | |
522 | isb(); | |
523 | ||
524 | /* Drain any buffered data */ | |
525 | psb_csync(); | |
526 | dsb(nsh); | |
527 | ||
528 | /* Disable the profiling buffer */ | |
529 | write_sysreg_s(0, SYS_PMBLIMITR_EL1); | |
530 | isb(); | |
531 | } | |
532 | ||
533 | /* IRQ handling */ | |
534 | static enum arm_spe_pmu_buf_fault_action | |
535 | arm_spe_pmu_buf_get_fault_act(struct perf_output_handle *handle) | |
536 | { | |
537 | const char *err_str; | |
538 | u64 pmbsr; | |
539 | enum arm_spe_pmu_buf_fault_action ret; | |
540 | ||
541 | /* | |
542 | * Ensure new profiling data is visible to the CPU and any external | |
543 | * aborts have been resolved. | |
544 | */ | |
545 | psb_csync(); | |
546 | dsb(nsh); | |
547 | ||
548 | /* Ensure hardware updates to PMBPTR_EL1 are visible */ | |
549 | isb(); | |
550 | ||
551 | /* Service required? */ | |
552 | pmbsr = read_sysreg_s(SYS_PMBSR_EL1); | |
553 | if (!(pmbsr & BIT(SYS_PMBSR_EL1_S_SHIFT))) | |
554 | return SPE_PMU_BUF_FAULT_ACT_SPURIOUS; | |
555 | ||
556 | /* | |
557 | * If we've lost data, disable profiling and also set the PARTIAL | |
558 | * flag to indicate that the last record is corrupted. | |
559 | */ | |
560 | if (pmbsr & BIT(SYS_PMBSR_EL1_DL_SHIFT)) | |
561 | perf_aux_output_flag(handle, PERF_AUX_FLAG_TRUNCATED | | |
562 | PERF_AUX_FLAG_PARTIAL); | |
563 | ||
564 | /* Report collisions to userspace so that it can up the period */ | |
565 | if (pmbsr & BIT(SYS_PMBSR_EL1_COLL_SHIFT)) | |
566 | perf_aux_output_flag(handle, PERF_AUX_FLAG_COLLISION); | |
567 | ||
568 | /* We only expect buffer management events */ | |
569 | switch (pmbsr & (SYS_PMBSR_EL1_EC_MASK << SYS_PMBSR_EL1_EC_SHIFT)) { | |
570 | case SYS_PMBSR_EL1_EC_BUF: | |
571 | /* Handled below */ | |
572 | break; | |
573 | case SYS_PMBSR_EL1_EC_FAULT_S1: | |
574 | case SYS_PMBSR_EL1_EC_FAULT_S2: | |
575 | err_str = "Unexpected buffer fault"; | |
576 | goto out_err; | |
577 | default: | |
578 | err_str = "Unknown error code"; | |
579 | goto out_err; | |
580 | } | |
581 | ||
582 | /* Buffer management event */ | |
583 | switch (pmbsr & | |
584 | (SYS_PMBSR_EL1_BUF_BSC_MASK << SYS_PMBSR_EL1_BUF_BSC_SHIFT)) { | |
585 | case SYS_PMBSR_EL1_BUF_BSC_FULL: | |
586 | ret = SPE_PMU_BUF_FAULT_ACT_OK; | |
587 | goto out_stop; | |
588 | default: | |
589 | err_str = "Unknown buffer status code"; | |
590 | } | |
591 | ||
592 | out_err: | |
593 | pr_err_ratelimited("%s on CPU %d [PMBSR=0x%016llx, PMBPTR=0x%016llx, PMBLIMITR=0x%016llx]\n", | |
594 | err_str, smp_processor_id(), pmbsr, | |
595 | read_sysreg_s(SYS_PMBPTR_EL1), | |
596 | read_sysreg_s(SYS_PMBLIMITR_EL1)); | |
597 | ret = SPE_PMU_BUF_FAULT_ACT_FATAL; | |
598 | ||
599 | out_stop: | |
600 | arm_spe_perf_aux_output_end(handle); | |
601 | return ret; | |
602 | } | |
603 | ||
604 | static irqreturn_t arm_spe_pmu_irq_handler(int irq, void *dev) | |
605 | { | |
606 | struct perf_output_handle *handle = dev; | |
607 | struct perf_event *event = handle->event; | |
608 | enum arm_spe_pmu_buf_fault_action act; | |
609 | ||
610 | if (!perf_get_aux(handle)) | |
611 | return IRQ_NONE; | |
612 | ||
613 | act = arm_spe_pmu_buf_get_fault_act(handle); | |
614 | if (act == SPE_PMU_BUF_FAULT_ACT_SPURIOUS) | |
615 | return IRQ_NONE; | |
616 | ||
617 | /* | |
618 | * Ensure perf callbacks have completed, which may disable the | |
619 | * profiling buffer in response to a TRUNCATION flag. | |
620 | */ | |
621 | irq_work_run(); | |
622 | ||
623 | switch (act) { | |
624 | case SPE_PMU_BUF_FAULT_ACT_FATAL: | |
625 | /* | |
626 | * If a fatal exception occurred then leaving the profiling | |
627 | * buffer enabled is a recipe waiting to happen. Since | |
628 | * fatal faults don't always imply truncation, make sure | |
629 | * that the profiling buffer is disabled explicitly before | |
630 | * clearing the syndrome register. | |
631 | */ | |
632 | arm_spe_pmu_disable_and_drain_local(); | |
633 | break; | |
634 | case SPE_PMU_BUF_FAULT_ACT_OK: | |
635 | /* | |
636 | * We handled the fault (the buffer was full), so resume | |
637 | * profiling as long as we didn't detect truncation. | |
638 | * PMBPTR might be misaligned, but we'll burn that bridge | |
639 | * when we get to it. | |
640 | */ | |
641 | if (!(handle->aux_flags & PERF_AUX_FLAG_TRUNCATED)) { | |
642 | arm_spe_perf_aux_output_begin(handle, event); | |
643 | isb(); | |
644 | } | |
645 | break; | |
646 | case SPE_PMU_BUF_FAULT_ACT_SPURIOUS: | |
647 | /* We've seen you before, but GCC has the memory of a sieve. */ | |
648 | break; | |
649 | } | |
650 | ||
651 | /* The buffer pointers are now sane, so resume profiling. */ | |
652 | write_sysreg_s(0, SYS_PMBSR_EL1); | |
653 | return IRQ_HANDLED; | |
654 | } | |
655 | ||
656 | /* Perf callbacks */ | |
657 | static int arm_spe_pmu_event_init(struct perf_event *event) | |
658 | { | |
659 | u64 reg; | |
660 | struct perf_event_attr *attr = &event->attr; | |
661 | struct arm_spe_pmu *spe_pmu = to_spe_pmu(event->pmu); | |
662 | ||
663 | /* This is, of course, deeply driver-specific */ | |
664 | if (attr->type != event->pmu->type) | |
665 | return -ENOENT; | |
666 | ||
667 | if (event->cpu >= 0 && | |
668 | !cpumask_test_cpu(event->cpu, &spe_pmu->supported_cpus)) | |
669 | return -ENOENT; | |
670 | ||
671 | if (arm_spe_event_to_pmsevfr(event) & SYS_PMSEVFR_EL1_RES0) | |
672 | return -EOPNOTSUPP; | |
673 | ||
674 | if (attr->exclude_idle) | |
675 | return -EOPNOTSUPP; | |
676 | ||
677 | /* | |
678 | * Feedback-directed frequency throttling doesn't work when we | |
679 | * have a buffer of samples. We'd need to manually count the | |
680 | * samples in the buffer when it fills up and adjust the event | |
681 | * count to reflect that. Instead, just force the user to specify | |
682 | * a sample period. | |
683 | */ | |
684 | if (attr->freq) | |
685 | return -EINVAL; | |
686 | ||
687 | reg = arm_spe_event_to_pmsfcr(event); | |
688 | if ((reg & BIT(SYS_PMSFCR_EL1_FE_SHIFT)) && | |
689 | !(spe_pmu->features & SPE_PMU_FEAT_FILT_EVT)) | |
690 | return -EOPNOTSUPP; | |
691 | ||
692 | if ((reg & BIT(SYS_PMSFCR_EL1_FT_SHIFT)) && | |
693 | !(spe_pmu->features & SPE_PMU_FEAT_FILT_TYP)) | |
694 | return -EOPNOTSUPP; | |
695 | ||
696 | if ((reg & BIT(SYS_PMSFCR_EL1_FL_SHIFT)) && | |
697 | !(spe_pmu->features & SPE_PMU_FEAT_FILT_LAT)) | |
698 | return -EOPNOTSUPP; | |
699 | ||
700 | reg = arm_spe_event_to_pmscr(event); | |
701 | if (!capable(CAP_SYS_ADMIN) && | |
702 | (reg & (BIT(SYS_PMSCR_EL1_PA_SHIFT) | | |
703 | BIT(SYS_PMSCR_EL1_CX_SHIFT) | | |
704 | BIT(SYS_PMSCR_EL1_PCT_SHIFT)))) | |
705 | return -EACCES; | |
706 | ||
707 | return 0; | |
708 | } | |
709 | ||
710 | static void arm_spe_pmu_start(struct perf_event *event, int flags) | |
711 | { | |
712 | u64 reg; | |
713 | struct arm_spe_pmu *spe_pmu = to_spe_pmu(event->pmu); | |
714 | struct hw_perf_event *hwc = &event->hw; | |
715 | struct perf_output_handle *handle = this_cpu_ptr(spe_pmu->handle); | |
716 | ||
717 | hwc->state = 0; | |
718 | arm_spe_perf_aux_output_begin(handle, event); | |
719 | if (hwc->state) | |
720 | return; | |
721 | ||
722 | reg = arm_spe_event_to_pmsfcr(event); | |
723 | write_sysreg_s(reg, SYS_PMSFCR_EL1); | |
724 | ||
725 | reg = arm_spe_event_to_pmsevfr(event); | |
726 | write_sysreg_s(reg, SYS_PMSEVFR_EL1); | |
727 | ||
728 | reg = arm_spe_event_to_pmslatfr(event); | |
729 | write_sysreg_s(reg, SYS_PMSLATFR_EL1); | |
730 | ||
731 | if (flags & PERF_EF_RELOAD) { | |
732 | reg = arm_spe_event_to_pmsirr(event); | |
733 | write_sysreg_s(reg, SYS_PMSIRR_EL1); | |
734 | isb(); | |
735 | reg = local64_read(&hwc->period_left); | |
736 | write_sysreg_s(reg, SYS_PMSICR_EL1); | |
737 | } | |
738 | ||
739 | reg = arm_spe_event_to_pmscr(event); | |
740 | isb(); | |
741 | write_sysreg_s(reg, SYS_PMSCR_EL1); | |
742 | } | |
743 | ||
744 | static void arm_spe_pmu_stop(struct perf_event *event, int flags) | |
745 | { | |
746 | struct arm_spe_pmu *spe_pmu = to_spe_pmu(event->pmu); | |
747 | struct hw_perf_event *hwc = &event->hw; | |
748 | struct perf_output_handle *handle = this_cpu_ptr(spe_pmu->handle); | |
749 | ||
750 | /* If we're already stopped, then nothing to do */ | |
751 | if (hwc->state & PERF_HES_STOPPED) | |
752 | return; | |
753 | ||
754 | /* Stop all trace generation */ | |
755 | arm_spe_pmu_disable_and_drain_local(); | |
756 | ||
757 | if (flags & PERF_EF_UPDATE) { | |
758 | /* | |
759 | * If there's a fault pending then ensure we contain it | |
760 | * to this buffer, since we might be on the context-switch | |
761 | * path. | |
762 | */ | |
763 | if (perf_get_aux(handle)) { | |
764 | enum arm_spe_pmu_buf_fault_action act; | |
765 | ||
766 | act = arm_spe_pmu_buf_get_fault_act(handle); | |
767 | if (act == SPE_PMU_BUF_FAULT_ACT_SPURIOUS) | |
768 | arm_spe_perf_aux_output_end(handle); | |
769 | else | |
770 | write_sysreg_s(0, SYS_PMBSR_EL1); | |
771 | } | |
772 | ||
773 | /* | |
774 | * This may also contain ECOUNT, but nobody else should | |
775 | * be looking at period_left, since we forbid frequency | |
776 | * based sampling. | |
777 | */ | |
778 | local64_set(&hwc->period_left, read_sysreg_s(SYS_PMSICR_EL1)); | |
779 | hwc->state |= PERF_HES_UPTODATE; | |
780 | } | |
781 | ||
782 | hwc->state |= PERF_HES_STOPPED; | |
783 | } | |
784 | ||
785 | static int arm_spe_pmu_add(struct perf_event *event, int flags) | |
786 | { | |
787 | int ret = 0; | |
788 | struct arm_spe_pmu *spe_pmu = to_spe_pmu(event->pmu); | |
789 | struct hw_perf_event *hwc = &event->hw; | |
790 | int cpu = event->cpu == -1 ? smp_processor_id() : event->cpu; | |
791 | ||
792 | if (!cpumask_test_cpu(cpu, &spe_pmu->supported_cpus)) | |
793 | return -ENOENT; | |
794 | ||
795 | hwc->state = PERF_HES_UPTODATE | PERF_HES_STOPPED; | |
796 | ||
797 | if (flags & PERF_EF_START) { | |
798 | arm_spe_pmu_start(event, PERF_EF_RELOAD); | |
799 | if (hwc->state & PERF_HES_STOPPED) | |
800 | ret = -EINVAL; | |
801 | } | |
802 | ||
803 | return ret; | |
804 | } | |
805 | ||
806 | static void arm_spe_pmu_del(struct perf_event *event, int flags) | |
807 | { | |
808 | arm_spe_pmu_stop(event, PERF_EF_UPDATE); | |
809 | } | |
810 | ||
811 | static void arm_spe_pmu_read(struct perf_event *event) | |
812 | { | |
813 | } | |
814 | ||
cb39484b MP |
815 | static void *arm_spe_pmu_setup_aux(struct perf_event *event, void **pages, |
816 | int nr_pages, bool snapshot) | |
d5d9696b | 817 | { |
cb39484b | 818 | int i, cpu = event->cpu; |
d5d9696b WD |
819 | struct page **pglist; |
820 | struct arm_spe_pmu_buf *buf; | |
821 | ||
822 | /* We need at least two pages for this to work. */ | |
823 | if (nr_pages < 2) | |
824 | return NULL; | |
825 | ||
826 | /* | |
827 | * We require an even number of pages for snapshot mode, so that | |
828 | * we can effectively treat the buffer as consisting of two equal | |
829 | * parts and give userspace a fighting chance of getting some | |
830 | * useful data out of it. | |
831 | */ | |
832 | if (!nr_pages || (snapshot && (nr_pages & 1))) | |
833 | return NULL; | |
834 | ||
835 | if (cpu == -1) | |
836 | cpu = raw_smp_processor_id(); | |
837 | ||
838 | buf = kzalloc_node(sizeof(*buf), GFP_KERNEL, cpu_to_node(cpu)); | |
839 | if (!buf) | |
840 | return NULL; | |
841 | ||
842 | pglist = kcalloc(nr_pages, sizeof(*pglist), GFP_KERNEL); | |
843 | if (!pglist) | |
844 | goto out_free_buf; | |
845 | ||
846 | for (i = 0; i < nr_pages; ++i) { | |
847 | struct page *page = virt_to_page(pages[i]); | |
848 | ||
849 | if (PagePrivate(page)) { | |
850 | pr_warn("unexpected high-order page for auxbuf!"); | |
851 | goto out_free_pglist; | |
852 | } | |
853 | ||
854 | pglist[i] = virt_to_page(pages[i]); | |
855 | } | |
856 | ||
857 | buf->base = vmap(pglist, nr_pages, VM_MAP, PAGE_KERNEL); | |
858 | if (!buf->base) | |
859 | goto out_free_pglist; | |
860 | ||
861 | buf->nr_pages = nr_pages; | |
862 | buf->snapshot = snapshot; | |
863 | ||
864 | kfree(pglist); | |
865 | return buf; | |
866 | ||
867 | out_free_pglist: | |
868 | kfree(pglist); | |
869 | out_free_buf: | |
870 | kfree(buf); | |
871 | return NULL; | |
872 | } | |
873 | ||
874 | static void arm_spe_pmu_free_aux(void *aux) | |
875 | { | |
876 | struct arm_spe_pmu_buf *buf = aux; | |
877 | ||
878 | vunmap(buf->base); | |
879 | kfree(buf); | |
880 | } | |
881 | ||
882 | /* Initialisation and teardown functions */ | |
883 | static int arm_spe_pmu_perf_init(struct arm_spe_pmu *spe_pmu) | |
884 | { | |
885 | static atomic_t pmu_idx = ATOMIC_INIT(-1); | |
886 | ||
887 | int idx; | |
888 | char *name; | |
889 | struct device *dev = &spe_pmu->pdev->dev; | |
890 | ||
891 | spe_pmu->pmu = (struct pmu) { | |
19b4aff2 | 892 | .module = THIS_MODULE, |
d5d9696b WD |
893 | .capabilities = PERF_PMU_CAP_EXCLUSIVE | PERF_PMU_CAP_ITRACE, |
894 | .attr_groups = arm_spe_pmu_attr_groups, | |
895 | /* | |
896 | * We hitch a ride on the software context here, so that | |
897 | * we can support per-task profiling (which is not possible | |
898 | * with the invalid context as it doesn't get sched callbacks). | |
899 | * This requires that userspace either uses a dummy event for | |
900 | * perf_event_open, since the aux buffer is not setup until | |
901 | * a subsequent mmap, or creates the profiling event in a | |
902 | * disabled state and explicitly PERF_EVENT_IOC_ENABLEs it | |
903 | * once the buffer has been created. | |
904 | */ | |
905 | .task_ctx_nr = perf_sw_context, | |
906 | .event_init = arm_spe_pmu_event_init, | |
907 | .add = arm_spe_pmu_add, | |
908 | .del = arm_spe_pmu_del, | |
909 | .start = arm_spe_pmu_start, | |
910 | .stop = arm_spe_pmu_stop, | |
911 | .read = arm_spe_pmu_read, | |
912 | .setup_aux = arm_spe_pmu_setup_aux, | |
913 | .free_aux = arm_spe_pmu_free_aux, | |
914 | }; | |
915 | ||
916 | idx = atomic_inc_return(&pmu_idx); | |
917 | name = devm_kasprintf(dev, GFP_KERNEL, "%s_%d", PMUNAME, idx); | |
185aca07 NMG |
918 | if (!name) { |
919 | dev_err(dev, "failed to allocate name for pmu %d\n", idx); | |
920 | return -ENOMEM; | |
921 | } | |
922 | ||
d5d9696b WD |
923 | return perf_pmu_register(&spe_pmu->pmu, name, -1); |
924 | } | |
925 | ||
926 | static void arm_spe_pmu_perf_destroy(struct arm_spe_pmu *spe_pmu) | |
927 | { | |
928 | perf_pmu_unregister(&spe_pmu->pmu); | |
929 | } | |
930 | ||
931 | static void __arm_spe_pmu_dev_probe(void *info) | |
932 | { | |
933 | int fld; | |
934 | u64 reg; | |
935 | struct arm_spe_pmu *spe_pmu = info; | |
936 | struct device *dev = &spe_pmu->pdev->dev; | |
937 | ||
938 | fld = cpuid_feature_extract_unsigned_field(read_cpuid(ID_AA64DFR0_EL1), | |
939 | ID_AA64DFR0_PMSVER_SHIFT); | |
940 | if (!fld) { | |
941 | dev_err(dev, | |
942 | "unsupported ID_AA64DFR0_EL1.PMSVer [%d] on CPU %d\n", | |
943 | fld, smp_processor_id()); | |
944 | return; | |
945 | } | |
946 | ||
947 | /* Read PMBIDR first to determine whether or not we have access */ | |
948 | reg = read_sysreg_s(SYS_PMBIDR_EL1); | |
949 | if (reg & BIT(SYS_PMBIDR_EL1_P_SHIFT)) { | |
950 | dev_err(dev, | |
951 | "profiling buffer owned by higher exception level\n"); | |
952 | return; | |
953 | } | |
954 | ||
955 | /* Minimum alignment. If it's out-of-range, then fail the probe */ | |
956 | fld = reg >> SYS_PMBIDR_EL1_ALIGN_SHIFT & SYS_PMBIDR_EL1_ALIGN_MASK; | |
957 | spe_pmu->align = 1 << fld; | |
958 | if (spe_pmu->align > SZ_2K) { | |
959 | dev_err(dev, "unsupported PMBIDR.Align [%d] on CPU %d\n", | |
960 | fld, smp_processor_id()); | |
961 | return; | |
962 | } | |
963 | ||
964 | /* It's now safe to read PMSIDR and figure out what we've got */ | |
965 | reg = read_sysreg_s(SYS_PMSIDR_EL1); | |
966 | if (reg & BIT(SYS_PMSIDR_EL1_FE_SHIFT)) | |
967 | spe_pmu->features |= SPE_PMU_FEAT_FILT_EVT; | |
968 | ||
969 | if (reg & BIT(SYS_PMSIDR_EL1_FT_SHIFT)) | |
970 | spe_pmu->features |= SPE_PMU_FEAT_FILT_TYP; | |
971 | ||
972 | if (reg & BIT(SYS_PMSIDR_EL1_FL_SHIFT)) | |
973 | spe_pmu->features |= SPE_PMU_FEAT_FILT_LAT; | |
974 | ||
975 | if (reg & BIT(SYS_PMSIDR_EL1_ARCHINST_SHIFT)) | |
976 | spe_pmu->features |= SPE_PMU_FEAT_ARCH_INST; | |
977 | ||
978 | if (reg & BIT(SYS_PMSIDR_EL1_LDS_SHIFT)) | |
979 | spe_pmu->features |= SPE_PMU_FEAT_LDS; | |
980 | ||
981 | if (reg & BIT(SYS_PMSIDR_EL1_ERND_SHIFT)) | |
982 | spe_pmu->features |= SPE_PMU_FEAT_ERND; | |
983 | ||
984 | /* This field has a spaced out encoding, so just use a look-up */ | |
985 | fld = reg >> SYS_PMSIDR_EL1_INTERVAL_SHIFT & SYS_PMSIDR_EL1_INTERVAL_MASK; | |
986 | switch (fld) { | |
987 | case 0: | |
988 | spe_pmu->min_period = 256; | |
989 | break; | |
990 | case 2: | |
991 | spe_pmu->min_period = 512; | |
992 | break; | |
993 | case 3: | |
994 | spe_pmu->min_period = 768; | |
995 | break; | |
996 | case 4: | |
997 | spe_pmu->min_period = 1024; | |
998 | break; | |
999 | case 5: | |
1000 | spe_pmu->min_period = 1536; | |
1001 | break; | |
1002 | case 6: | |
1003 | spe_pmu->min_period = 2048; | |
1004 | break; | |
1005 | case 7: | |
1006 | spe_pmu->min_period = 3072; | |
1007 | break; | |
1008 | default: | |
1009 | dev_warn(dev, "unknown PMSIDR_EL1.Interval [%d]; assuming 8\n", | |
1010 | fld); | |
1011 | /* Fallthrough */ | |
1012 | case 8: | |
1013 | spe_pmu->min_period = 4096; | |
1014 | } | |
1015 | ||
1016 | /* Maximum record size. If it's out-of-range, then fail the probe */ | |
1017 | fld = reg >> SYS_PMSIDR_EL1_MAXSIZE_SHIFT & SYS_PMSIDR_EL1_MAXSIZE_MASK; | |
1018 | spe_pmu->max_record_sz = 1 << fld; | |
1019 | if (spe_pmu->max_record_sz > SZ_2K || spe_pmu->max_record_sz < 16) { | |
1020 | dev_err(dev, "unsupported PMSIDR_EL1.MaxSize [%d] on CPU %d\n", | |
1021 | fld, smp_processor_id()); | |
1022 | return; | |
1023 | } | |
1024 | ||
1025 | fld = reg >> SYS_PMSIDR_EL1_COUNTSIZE_SHIFT & SYS_PMSIDR_EL1_COUNTSIZE_MASK; | |
1026 | switch (fld) { | |
1027 | default: | |
1028 | dev_warn(dev, "unknown PMSIDR_EL1.CountSize [%d]; assuming 2\n", | |
1029 | fld); | |
1030 | /* Fallthrough */ | |
1031 | case 2: | |
1032 | spe_pmu->counter_sz = 12; | |
1033 | } | |
1034 | ||
1035 | dev_info(dev, | |
1036 | "probed for CPUs %*pbl [max_record_sz %u, align %u, features 0x%llx]\n", | |
1037 | cpumask_pr_args(&spe_pmu->supported_cpus), | |
1038 | spe_pmu->max_record_sz, spe_pmu->align, spe_pmu->features); | |
1039 | ||
1040 | spe_pmu->features |= SPE_PMU_FEAT_DEV_PROBED; | |
1041 | return; | |
1042 | } | |
1043 | ||
1044 | static void __arm_spe_pmu_reset_local(void) | |
1045 | { | |
1046 | /* | |
1047 | * This is probably overkill, as we have no idea where we're | |
1048 | * draining any buffered data to... | |
1049 | */ | |
1050 | arm_spe_pmu_disable_and_drain_local(); | |
1051 | ||
1052 | /* Reset the buffer base pointer */ | |
1053 | write_sysreg_s(0, SYS_PMBPTR_EL1); | |
1054 | isb(); | |
1055 | ||
1056 | /* Clear any pending management interrupts */ | |
1057 | write_sysreg_s(0, SYS_PMBSR_EL1); | |
1058 | isb(); | |
1059 | } | |
1060 | ||
1061 | static void __arm_spe_pmu_setup_one(void *info) | |
1062 | { | |
1063 | struct arm_spe_pmu *spe_pmu = info; | |
1064 | ||
1065 | __arm_spe_pmu_reset_local(); | |
1066 | enable_percpu_irq(spe_pmu->irq, IRQ_TYPE_NONE); | |
1067 | } | |
1068 | ||
1069 | static void __arm_spe_pmu_stop_one(void *info) | |
1070 | { | |
1071 | struct arm_spe_pmu *spe_pmu = info; | |
1072 | ||
1073 | disable_percpu_irq(spe_pmu->irq); | |
1074 | __arm_spe_pmu_reset_local(); | |
1075 | } | |
1076 | ||
1077 | static int arm_spe_pmu_cpu_startup(unsigned int cpu, struct hlist_node *node) | |
1078 | { | |
1079 | struct arm_spe_pmu *spe_pmu; | |
1080 | ||
1081 | spe_pmu = hlist_entry_safe(node, struct arm_spe_pmu, hotplug_node); | |
1082 | if (!cpumask_test_cpu(cpu, &spe_pmu->supported_cpus)) | |
1083 | return 0; | |
1084 | ||
1085 | __arm_spe_pmu_setup_one(spe_pmu); | |
1086 | return 0; | |
1087 | } | |
1088 | ||
1089 | static int arm_spe_pmu_cpu_teardown(unsigned int cpu, struct hlist_node *node) | |
1090 | { | |
1091 | struct arm_spe_pmu *spe_pmu; | |
1092 | ||
1093 | spe_pmu = hlist_entry_safe(node, struct arm_spe_pmu, hotplug_node); | |
1094 | if (!cpumask_test_cpu(cpu, &spe_pmu->supported_cpus)) | |
1095 | return 0; | |
1096 | ||
1097 | __arm_spe_pmu_stop_one(spe_pmu); | |
1098 | return 0; | |
1099 | } | |
1100 | ||
1101 | static int arm_spe_pmu_dev_init(struct arm_spe_pmu *spe_pmu) | |
1102 | { | |
1103 | int ret; | |
1104 | cpumask_t *mask = &spe_pmu->supported_cpus; | |
1105 | ||
1106 | /* Make sure we probe the hardware on a relevant CPU */ | |
1107 | ret = smp_call_function_any(mask, __arm_spe_pmu_dev_probe, spe_pmu, 1); | |
1108 | if (ret || !(spe_pmu->features & SPE_PMU_FEAT_DEV_PROBED)) | |
1109 | return -ENXIO; | |
1110 | ||
1111 | /* Request our PPIs (note that the IRQ is still disabled) */ | |
1112 | ret = request_percpu_irq(spe_pmu->irq, arm_spe_pmu_irq_handler, DRVNAME, | |
1113 | spe_pmu->handle); | |
1114 | if (ret) | |
1115 | return ret; | |
1116 | ||
1117 | /* | |
1118 | * Register our hotplug notifier now so we don't miss any events. | |
1119 | * This will enable the IRQ for any supported CPUs that are already | |
1120 | * up. | |
1121 | */ | |
1122 | ret = cpuhp_state_add_instance(arm_spe_pmu_online, | |
1123 | &spe_pmu->hotplug_node); | |
1124 | if (ret) | |
1125 | free_percpu_irq(spe_pmu->irq, spe_pmu->handle); | |
1126 | ||
1127 | return ret; | |
1128 | } | |
1129 | ||
1130 | static void arm_spe_pmu_dev_teardown(struct arm_spe_pmu *spe_pmu) | |
1131 | { | |
1132 | cpuhp_state_remove_instance(arm_spe_pmu_online, &spe_pmu->hotplug_node); | |
1133 | free_percpu_irq(spe_pmu->irq, spe_pmu->handle); | |
1134 | } | |
1135 | ||
1136 | /* Driver and device probing */ | |
1137 | static int arm_spe_pmu_irq_probe(struct arm_spe_pmu *spe_pmu) | |
1138 | { | |
1139 | struct platform_device *pdev = spe_pmu->pdev; | |
1140 | int irq = platform_get_irq(pdev, 0); | |
1141 | ||
1142 | if (irq < 0) { | |
1143 | dev_err(&pdev->dev, "failed to get IRQ (%d)\n", irq); | |
1144 | return -ENXIO; | |
1145 | } | |
1146 | ||
1147 | if (!irq_is_percpu(irq)) { | |
1148 | dev_err(&pdev->dev, "expected PPI but got SPI (%d)\n", irq); | |
1149 | return -EINVAL; | |
1150 | } | |
1151 | ||
1152 | if (irq_get_percpu_devid_partition(irq, &spe_pmu->supported_cpus)) { | |
1153 | dev_err(&pdev->dev, "failed to get PPI partition (%d)\n", irq); | |
1154 | return -EINVAL; | |
1155 | } | |
1156 | ||
1157 | spe_pmu->irq = irq; | |
1158 | return 0; | |
1159 | } | |
1160 | ||
1161 | static const struct of_device_id arm_spe_pmu_of_match[] = { | |
1162 | { .compatible = "arm,statistical-profiling-extension-v1", .data = (void *)1 }, | |
1163 | { /* Sentinel */ }, | |
1164 | }; | |
1165 | ||
1166 | static int arm_spe_pmu_device_dt_probe(struct platform_device *pdev) | |
1167 | { | |
1168 | int ret; | |
1169 | struct arm_spe_pmu *spe_pmu; | |
1170 | struct device *dev = &pdev->dev; | |
1171 | ||
8609a7ec WD |
1172 | /* |
1173 | * If kernelspace is unmapped when running at EL0, then the SPE | |
1174 | * buffer will fault and prematurely terminate the AUX session. | |
1175 | */ | |
1176 | if (arm64_kernel_unmapped_at_el0()) { | |
1177 | dev_warn_once(dev, "profiling buffer inaccessible. Try passing \"kpti=off\" on the kernel command line\n"); | |
1178 | return -EPERM; | |
1179 | } | |
1180 | ||
d5d9696b WD |
1181 | spe_pmu = devm_kzalloc(dev, sizeof(*spe_pmu), GFP_KERNEL); |
1182 | if (!spe_pmu) { | |
1183 | dev_err(dev, "failed to allocate spe_pmu\n"); | |
1184 | return -ENOMEM; | |
1185 | } | |
1186 | ||
1187 | spe_pmu->handle = alloc_percpu(typeof(*spe_pmu->handle)); | |
1188 | if (!spe_pmu->handle) | |
1189 | return -ENOMEM; | |
1190 | ||
1191 | spe_pmu->pdev = pdev; | |
1192 | platform_set_drvdata(pdev, spe_pmu); | |
1193 | ||
1194 | ret = arm_spe_pmu_irq_probe(spe_pmu); | |
1195 | if (ret) | |
1196 | goto out_free_handle; | |
1197 | ||
1198 | ret = arm_spe_pmu_dev_init(spe_pmu); | |
1199 | if (ret) | |
1200 | goto out_free_handle; | |
1201 | ||
1202 | ret = arm_spe_pmu_perf_init(spe_pmu); | |
1203 | if (ret) | |
1204 | goto out_teardown_dev; | |
1205 | ||
1206 | return 0; | |
1207 | ||
1208 | out_teardown_dev: | |
1209 | arm_spe_pmu_dev_teardown(spe_pmu); | |
1210 | out_free_handle: | |
1211 | free_percpu(spe_pmu->handle); | |
1212 | return ret; | |
1213 | } | |
1214 | ||
1215 | static int arm_spe_pmu_device_remove(struct platform_device *pdev) | |
1216 | { | |
1217 | struct arm_spe_pmu *spe_pmu = platform_get_drvdata(pdev); | |
1218 | ||
1219 | arm_spe_pmu_perf_destroy(spe_pmu); | |
1220 | arm_spe_pmu_dev_teardown(spe_pmu); | |
1221 | free_percpu(spe_pmu->handle); | |
1222 | return 0; | |
1223 | } | |
1224 | ||
1225 | static struct platform_driver arm_spe_pmu_driver = { | |
1226 | .driver = { | |
1227 | .name = DRVNAME, | |
1228 | .of_match_table = of_match_ptr(arm_spe_pmu_of_match), | |
1229 | }, | |
1230 | .probe = arm_spe_pmu_device_dt_probe, | |
1231 | .remove = arm_spe_pmu_device_remove, | |
1232 | }; | |
1233 | ||
1234 | static int __init arm_spe_pmu_init(void) | |
1235 | { | |
1236 | int ret; | |
1237 | ||
1238 | ret = cpuhp_setup_state_multi(CPUHP_AP_ONLINE_DYN, DRVNAME, | |
1239 | arm_spe_pmu_cpu_startup, | |
1240 | arm_spe_pmu_cpu_teardown); | |
1241 | if (ret < 0) | |
1242 | return ret; | |
1243 | arm_spe_pmu_online = ret; | |
1244 | ||
1245 | ret = platform_driver_register(&arm_spe_pmu_driver); | |
1246 | if (ret) | |
1247 | cpuhp_remove_multi_state(arm_spe_pmu_online); | |
1248 | ||
1249 | return ret; | |
1250 | } | |
1251 | ||
1252 | static void __exit arm_spe_pmu_exit(void) | |
1253 | { | |
1254 | platform_driver_unregister(&arm_spe_pmu_driver); | |
1255 | cpuhp_remove_multi_state(arm_spe_pmu_online); | |
1256 | } | |
1257 | ||
1258 | module_init(arm_spe_pmu_init); | |
1259 | module_exit(arm_spe_pmu_exit); | |
1260 | ||
1261 | MODULE_DESCRIPTION("Perf driver for the ARMv8.2 Statistical Profiling Extension"); | |
1262 | MODULE_AUTHOR("Will Deacon <will.deacon@arm.com>"); | |
1263 | MODULE_LICENSE("GPL v2"); |