]>
Commit | Line | Data |
---|---|---|
caab277b | 1 | // SPDX-License-Identifier: GPL-2.0-only |
d5d9696b WD |
2 | /* |
3 | * Perf support for the Statistical Profiling Extension, introduced as | |
4 | * part of ARMv8.2. | |
5 | * | |
d5d9696b WD |
6 | * Copyright (C) 2016 ARM Limited |
7 | * | |
8 | * Author: Will Deacon <will.deacon@arm.com> | |
9 | */ | |
10 | ||
11 | #define PMUNAME "arm_spe" | |
12 | #define DRVNAME PMUNAME "_pmu" | |
13 | #define pr_fmt(fmt) DRVNAME ": " fmt | |
14 | ||
fcd9f831 AB |
15 | #include <linux/bitops.h> |
16 | #include <linux/bug.h> | |
17 | #include <linux/capability.h> | |
d5d9696b | 18 | #include <linux/cpuhotplug.h> |
fcd9f831 AB |
19 | #include <linux/cpumask.h> |
20 | #include <linux/device.h> | |
21 | #include <linux/errno.h> | |
d5d9696b WD |
22 | #include <linux/interrupt.h> |
23 | #include <linux/irq.h> | |
fcd9f831 AB |
24 | #include <linux/kernel.h> |
25 | #include <linux/list.h> | |
d5d9696b WD |
26 | #include <linux/module.h> |
27 | #include <linux/of_address.h> | |
28 | #include <linux/of_device.h> | |
29 | #include <linux/perf_event.h> | |
d482e575 | 30 | #include <linux/perf/arm_pmu.h> |
d5d9696b | 31 | #include <linux/platform_device.h> |
fcd9f831 | 32 | #include <linux/printk.h> |
d5d9696b | 33 | #include <linux/slab.h> |
fcd9f831 AB |
34 | #include <linux/smp.h> |
35 | #include <linux/vmalloc.h> | |
d5d9696b | 36 | |
fcd9f831 AB |
37 | #include <asm/barrier.h> |
38 | #include <asm/cpufeature.h> | |
39 | #include <asm/mmu.h> | |
d5d9696b WD |
40 | #include <asm/sysreg.h> |
41 | ||
42 | #define ARM_SPE_BUF_PAD_BYTE 0 | |
43 | ||
44 | struct arm_spe_pmu_buf { | |
45 | int nr_pages; | |
46 | bool snapshot; | |
47 | void *base; | |
48 | }; | |
49 | ||
50 | struct arm_spe_pmu { | |
51 | struct pmu pmu; | |
52 | struct platform_device *pdev; | |
53 | cpumask_t supported_cpus; | |
54 | struct hlist_node hotplug_node; | |
55 | ||
56 | int irq; /* PPI */ | |
4a669e24 | 57 | u16 pmsver; |
d5d9696b WD |
58 | u16 min_period; |
59 | u16 counter_sz; | |
60 | ||
61 | #define SPE_PMU_FEAT_FILT_EVT (1UL << 0) | |
62 | #define SPE_PMU_FEAT_FILT_TYP (1UL << 1) | |
63 | #define SPE_PMU_FEAT_FILT_LAT (1UL << 2) | |
64 | #define SPE_PMU_FEAT_ARCH_INST (1UL << 3) | |
65 | #define SPE_PMU_FEAT_LDS (1UL << 4) | |
66 | #define SPE_PMU_FEAT_ERND (1UL << 5) | |
67 | #define SPE_PMU_FEAT_DEV_PROBED (1UL << 63) | |
68 | u64 features; | |
69 | ||
70 | u16 max_record_sz; | |
71 | u16 align; | |
72 | struct perf_output_handle __percpu *handle; | |
73 | }; | |
74 | ||
75 | #define to_spe_pmu(p) (container_of(p, struct arm_spe_pmu, pmu)) | |
76 | ||
77 | /* Convert a free-running index from perf into an SPE buffer offset */ | |
78 | #define PERF_IDX2OFF(idx, buf) ((idx) % ((buf)->nr_pages << PAGE_SHIFT)) | |
79 | ||
80 | /* Keep track of our dynamic hotplug state */ | |
81 | static enum cpuhp_state arm_spe_pmu_online; | |
82 | ||
83 | enum arm_spe_pmu_buf_fault_action { | |
84 | SPE_PMU_BUF_FAULT_ACT_SPURIOUS, | |
85 | SPE_PMU_BUF_FAULT_ACT_FATAL, | |
86 | SPE_PMU_BUF_FAULT_ACT_OK, | |
87 | }; | |
88 | ||
89 | /* This sysfs gunk was really good fun to write. */ | |
90 | enum arm_spe_pmu_capabilities { | |
91 | SPE_PMU_CAP_ARCH_INST = 0, | |
92 | SPE_PMU_CAP_ERND, | |
93 | SPE_PMU_CAP_FEAT_MAX, | |
94 | SPE_PMU_CAP_CNT_SZ = SPE_PMU_CAP_FEAT_MAX, | |
95 | SPE_PMU_CAP_MIN_IVAL, | |
96 | }; | |
97 | ||
98 | static int arm_spe_pmu_feat_caps[SPE_PMU_CAP_FEAT_MAX] = { | |
99 | [SPE_PMU_CAP_ARCH_INST] = SPE_PMU_FEAT_ARCH_INST, | |
100 | [SPE_PMU_CAP_ERND] = SPE_PMU_FEAT_ERND, | |
101 | }; | |
102 | ||
103 | static u32 arm_spe_pmu_cap_get(struct arm_spe_pmu *spe_pmu, int cap) | |
104 | { | |
105 | if (cap < SPE_PMU_CAP_FEAT_MAX) | |
106 | return !!(spe_pmu->features & arm_spe_pmu_feat_caps[cap]); | |
107 | ||
108 | switch (cap) { | |
109 | case SPE_PMU_CAP_CNT_SZ: | |
110 | return spe_pmu->counter_sz; | |
111 | case SPE_PMU_CAP_MIN_IVAL: | |
112 | return spe_pmu->min_period; | |
113 | default: | |
114 | WARN(1, "unknown cap %d\n", cap); | |
115 | } | |
116 | ||
117 | return 0; | |
118 | } | |
119 | ||
120 | static ssize_t arm_spe_pmu_cap_show(struct device *dev, | |
121 | struct device_attribute *attr, | |
122 | char *buf) | |
123 | { | |
d0f2e423 | 124 | struct arm_spe_pmu *spe_pmu = dev_get_drvdata(dev); |
d5d9696b WD |
125 | struct dev_ext_attribute *ea = |
126 | container_of(attr, struct dev_ext_attribute, attr); | |
127 | int cap = (long)ea->var; | |
128 | ||
129 | return snprintf(buf, PAGE_SIZE, "%u\n", | |
130 | arm_spe_pmu_cap_get(spe_pmu, cap)); | |
131 | } | |
132 | ||
133 | #define SPE_EXT_ATTR_ENTRY(_name, _func, _var) \ | |
134 | &((struct dev_ext_attribute[]) { \ | |
135 | { __ATTR(_name, S_IRUGO, _func, NULL), (void *)_var } \ | |
136 | })[0].attr.attr | |
137 | ||
138 | #define SPE_CAP_EXT_ATTR_ENTRY(_name, _var) \ | |
139 | SPE_EXT_ATTR_ENTRY(_name, arm_spe_pmu_cap_show, _var) | |
140 | ||
141 | static struct attribute *arm_spe_pmu_cap_attr[] = { | |
142 | SPE_CAP_EXT_ATTR_ENTRY(arch_inst, SPE_PMU_CAP_ARCH_INST), | |
143 | SPE_CAP_EXT_ATTR_ENTRY(ernd, SPE_PMU_CAP_ERND), | |
144 | SPE_CAP_EXT_ATTR_ENTRY(count_size, SPE_PMU_CAP_CNT_SZ), | |
145 | SPE_CAP_EXT_ATTR_ENTRY(min_interval, SPE_PMU_CAP_MIN_IVAL), | |
146 | NULL, | |
147 | }; | |
148 | ||
f0c14048 | 149 | static const struct attribute_group arm_spe_pmu_cap_group = { |
d5d9696b WD |
150 | .name = "caps", |
151 | .attrs = arm_spe_pmu_cap_attr, | |
152 | }; | |
153 | ||
154 | /* User ABI */ | |
155 | #define ATTR_CFG_FLD_ts_enable_CFG config /* PMSCR_EL1.TS */ | |
156 | #define ATTR_CFG_FLD_ts_enable_LO 0 | |
157 | #define ATTR_CFG_FLD_ts_enable_HI 0 | |
158 | #define ATTR_CFG_FLD_pa_enable_CFG config /* PMSCR_EL1.PA */ | |
159 | #define ATTR_CFG_FLD_pa_enable_LO 1 | |
160 | #define ATTR_CFG_FLD_pa_enable_HI 1 | |
161 | #define ATTR_CFG_FLD_pct_enable_CFG config /* PMSCR_EL1.PCT */ | |
162 | #define ATTR_CFG_FLD_pct_enable_LO 2 | |
163 | #define ATTR_CFG_FLD_pct_enable_HI 2 | |
164 | #define ATTR_CFG_FLD_jitter_CFG config /* PMSIRR_EL1.RND */ | |
165 | #define ATTR_CFG_FLD_jitter_LO 16 | |
166 | #define ATTR_CFG_FLD_jitter_HI 16 | |
167 | #define ATTR_CFG_FLD_branch_filter_CFG config /* PMSFCR_EL1.B */ | |
168 | #define ATTR_CFG_FLD_branch_filter_LO 32 | |
169 | #define ATTR_CFG_FLD_branch_filter_HI 32 | |
170 | #define ATTR_CFG_FLD_load_filter_CFG config /* PMSFCR_EL1.LD */ | |
171 | #define ATTR_CFG_FLD_load_filter_LO 33 | |
172 | #define ATTR_CFG_FLD_load_filter_HI 33 | |
173 | #define ATTR_CFG_FLD_store_filter_CFG config /* PMSFCR_EL1.ST */ | |
174 | #define ATTR_CFG_FLD_store_filter_LO 34 | |
175 | #define ATTR_CFG_FLD_store_filter_HI 34 | |
176 | ||
177 | #define ATTR_CFG_FLD_event_filter_CFG config1 /* PMSEVFR_EL1 */ | |
178 | #define ATTR_CFG_FLD_event_filter_LO 0 | |
179 | #define ATTR_CFG_FLD_event_filter_HI 63 | |
180 | ||
181 | #define ATTR_CFG_FLD_min_latency_CFG config2 /* PMSLATFR_EL1.MINLAT */ | |
182 | #define ATTR_CFG_FLD_min_latency_LO 0 | |
183 | #define ATTR_CFG_FLD_min_latency_HI 11 | |
184 | ||
185 | /* Why does everything I do descend into this? */ | |
186 | #define __GEN_PMU_FORMAT_ATTR(cfg, lo, hi) \ | |
187 | (lo) == (hi) ? #cfg ":" #lo "\n" : #cfg ":" #lo "-" #hi | |
188 | ||
189 | #define _GEN_PMU_FORMAT_ATTR(cfg, lo, hi) \ | |
190 | __GEN_PMU_FORMAT_ATTR(cfg, lo, hi) | |
191 | ||
192 | #define GEN_PMU_FORMAT_ATTR(name) \ | |
193 | PMU_FORMAT_ATTR(name, \ | |
194 | _GEN_PMU_FORMAT_ATTR(ATTR_CFG_FLD_##name##_CFG, \ | |
195 | ATTR_CFG_FLD_##name##_LO, \ | |
196 | ATTR_CFG_FLD_##name##_HI)) | |
197 | ||
198 | #define _ATTR_CFG_GET_FLD(attr, cfg, lo, hi) \ | |
199 | ((((attr)->cfg) >> lo) & GENMASK(hi - lo, 0)) | |
200 | ||
201 | #define ATTR_CFG_GET_FLD(attr, name) \ | |
202 | _ATTR_CFG_GET_FLD(attr, \ | |
203 | ATTR_CFG_FLD_##name##_CFG, \ | |
204 | ATTR_CFG_FLD_##name##_LO, \ | |
205 | ATTR_CFG_FLD_##name##_HI) | |
206 | ||
207 | GEN_PMU_FORMAT_ATTR(ts_enable); | |
208 | GEN_PMU_FORMAT_ATTR(pa_enable); | |
209 | GEN_PMU_FORMAT_ATTR(pct_enable); | |
210 | GEN_PMU_FORMAT_ATTR(jitter); | |
211 | GEN_PMU_FORMAT_ATTR(branch_filter); | |
212 | GEN_PMU_FORMAT_ATTR(load_filter); | |
213 | GEN_PMU_FORMAT_ATTR(store_filter); | |
214 | GEN_PMU_FORMAT_ATTR(event_filter); | |
215 | GEN_PMU_FORMAT_ATTR(min_latency); | |
216 | ||
217 | static struct attribute *arm_spe_pmu_formats_attr[] = { | |
218 | &format_attr_ts_enable.attr, | |
219 | &format_attr_pa_enable.attr, | |
220 | &format_attr_pct_enable.attr, | |
221 | &format_attr_jitter.attr, | |
222 | &format_attr_branch_filter.attr, | |
223 | &format_attr_load_filter.attr, | |
224 | &format_attr_store_filter.attr, | |
225 | &format_attr_event_filter.attr, | |
226 | &format_attr_min_latency.attr, | |
227 | NULL, | |
228 | }; | |
229 | ||
f0c14048 | 230 | static const struct attribute_group arm_spe_pmu_format_group = { |
d5d9696b WD |
231 | .name = "format", |
232 | .attrs = arm_spe_pmu_formats_attr, | |
233 | }; | |
234 | ||
235 | static ssize_t arm_spe_pmu_get_attr_cpumask(struct device *dev, | |
236 | struct device_attribute *attr, | |
237 | char *buf) | |
238 | { | |
d0f2e423 | 239 | struct arm_spe_pmu *spe_pmu = dev_get_drvdata(dev); |
d5d9696b WD |
240 | |
241 | return cpumap_print_to_pagebuf(true, buf, &spe_pmu->supported_cpus); | |
242 | } | |
243 | static DEVICE_ATTR(cpumask, S_IRUGO, arm_spe_pmu_get_attr_cpumask, NULL); | |
244 | ||
245 | static struct attribute *arm_spe_pmu_attrs[] = { | |
246 | &dev_attr_cpumask.attr, | |
247 | NULL, | |
248 | }; | |
249 | ||
f0c14048 | 250 | static const struct attribute_group arm_spe_pmu_group = { |
d5d9696b WD |
251 | .attrs = arm_spe_pmu_attrs, |
252 | }; | |
253 | ||
254 | static const struct attribute_group *arm_spe_pmu_attr_groups[] = { | |
255 | &arm_spe_pmu_group, | |
256 | &arm_spe_pmu_cap_group, | |
257 | &arm_spe_pmu_format_group, | |
258 | NULL, | |
259 | }; | |
260 | ||
261 | /* Convert between user ABI and register values */ | |
262 | static u64 arm_spe_event_to_pmscr(struct perf_event *event) | |
263 | { | |
264 | struct perf_event_attr *attr = &event->attr; | |
265 | u64 reg = 0; | |
266 | ||
267 | reg |= ATTR_CFG_GET_FLD(attr, ts_enable) << SYS_PMSCR_EL1_TS_SHIFT; | |
268 | reg |= ATTR_CFG_GET_FLD(attr, pa_enable) << SYS_PMSCR_EL1_PA_SHIFT; | |
269 | reg |= ATTR_CFG_GET_FLD(attr, pct_enable) << SYS_PMSCR_EL1_PCT_SHIFT; | |
270 | ||
271 | if (!attr->exclude_user) | |
272 | reg |= BIT(SYS_PMSCR_EL1_E0SPE_SHIFT); | |
273 | ||
274 | if (!attr->exclude_kernel) | |
275 | reg |= BIT(SYS_PMSCR_EL1_E1SPE_SHIFT); | |
276 | ||
cea7d0d4 | 277 | if (IS_ENABLED(CONFIG_PID_IN_CONTEXTIDR) && perfmon_capable()) |
d5d9696b WD |
278 | reg |= BIT(SYS_PMSCR_EL1_CX_SHIFT); |
279 | ||
280 | return reg; | |
281 | } | |
282 | ||
283 | static void arm_spe_event_sanitise_period(struct perf_event *event) | |
284 | { | |
285 | struct arm_spe_pmu *spe_pmu = to_spe_pmu(event->pmu); | |
286 | u64 period = event->hw.sample_period; | |
287 | u64 max_period = SYS_PMSIRR_EL1_INTERVAL_MASK | |
288 | << SYS_PMSIRR_EL1_INTERVAL_SHIFT; | |
289 | ||
290 | if (period < spe_pmu->min_period) | |
291 | period = spe_pmu->min_period; | |
292 | else if (period > max_period) | |
293 | period = max_period; | |
294 | else | |
295 | period &= max_period; | |
296 | ||
297 | event->hw.sample_period = period; | |
298 | } | |
299 | ||
300 | static u64 arm_spe_event_to_pmsirr(struct perf_event *event) | |
301 | { | |
302 | struct perf_event_attr *attr = &event->attr; | |
303 | u64 reg = 0; | |
304 | ||
305 | arm_spe_event_sanitise_period(event); | |
306 | ||
307 | reg |= ATTR_CFG_GET_FLD(attr, jitter) << SYS_PMSIRR_EL1_RND_SHIFT; | |
308 | reg |= event->hw.sample_period; | |
309 | ||
310 | return reg; | |
311 | } | |
312 | ||
313 | static u64 arm_spe_event_to_pmsfcr(struct perf_event *event) | |
314 | { | |
315 | struct perf_event_attr *attr = &event->attr; | |
316 | u64 reg = 0; | |
317 | ||
318 | reg |= ATTR_CFG_GET_FLD(attr, load_filter) << SYS_PMSFCR_EL1_LD_SHIFT; | |
319 | reg |= ATTR_CFG_GET_FLD(attr, store_filter) << SYS_PMSFCR_EL1_ST_SHIFT; | |
320 | reg |= ATTR_CFG_GET_FLD(attr, branch_filter) << SYS_PMSFCR_EL1_B_SHIFT; | |
321 | ||
322 | if (reg) | |
323 | reg |= BIT(SYS_PMSFCR_EL1_FT_SHIFT); | |
324 | ||
325 | if (ATTR_CFG_GET_FLD(attr, event_filter)) | |
326 | reg |= BIT(SYS_PMSFCR_EL1_FE_SHIFT); | |
327 | ||
328 | if (ATTR_CFG_GET_FLD(attr, min_latency)) | |
329 | reg |= BIT(SYS_PMSFCR_EL1_FL_SHIFT); | |
330 | ||
331 | return reg; | |
332 | } | |
333 | ||
334 | static u64 arm_spe_event_to_pmsevfr(struct perf_event *event) | |
335 | { | |
336 | struct perf_event_attr *attr = &event->attr; | |
337 | return ATTR_CFG_GET_FLD(attr, event_filter); | |
338 | } | |
339 | ||
340 | static u64 arm_spe_event_to_pmslatfr(struct perf_event *event) | |
341 | { | |
342 | struct perf_event_attr *attr = &event->attr; | |
343 | return ATTR_CFG_GET_FLD(attr, min_latency) | |
344 | << SYS_PMSLATFR_EL1_MINLAT_SHIFT; | |
345 | } | |
346 | ||
347 | static void arm_spe_pmu_pad_buf(struct perf_output_handle *handle, int len) | |
348 | { | |
349 | struct arm_spe_pmu_buf *buf = perf_get_aux(handle); | |
350 | u64 head = PERF_IDX2OFF(handle->head, buf); | |
351 | ||
352 | memset(buf->base + head, ARM_SPE_BUF_PAD_BYTE, len); | |
353 | if (!buf->snapshot) | |
354 | perf_aux_output_skip(handle, len); | |
355 | } | |
356 | ||
357 | static u64 arm_spe_pmu_next_snapshot_off(struct perf_output_handle *handle) | |
358 | { | |
359 | struct arm_spe_pmu_buf *buf = perf_get_aux(handle); | |
360 | struct arm_spe_pmu *spe_pmu = to_spe_pmu(handle->event->pmu); | |
361 | u64 head = PERF_IDX2OFF(handle->head, buf); | |
362 | u64 limit = buf->nr_pages * PAGE_SIZE; | |
363 | ||
364 | /* | |
365 | * The trace format isn't parseable in reverse, so clamp | |
366 | * the limit to half of the buffer size in snapshot mode | |
367 | * so that the worst case is half a buffer of records, as | |
368 | * opposed to a single record. | |
369 | */ | |
370 | if (head < limit >> 1) | |
371 | limit >>= 1; | |
372 | ||
373 | /* | |
374 | * If we're within max_record_sz of the limit, we must | |
375 | * pad, move the head index and recompute the limit. | |
376 | */ | |
377 | if (limit - head < spe_pmu->max_record_sz) { | |
378 | arm_spe_pmu_pad_buf(handle, limit - head); | |
379 | handle->head = PERF_IDX2OFF(limit, buf); | |
380 | limit = ((buf->nr_pages * PAGE_SIZE) >> 1) + handle->head; | |
381 | } | |
382 | ||
383 | return limit; | |
384 | } | |
385 | ||
386 | static u64 __arm_spe_pmu_next_off(struct perf_output_handle *handle) | |
387 | { | |
388 | struct arm_spe_pmu *spe_pmu = to_spe_pmu(handle->event->pmu); | |
389 | struct arm_spe_pmu_buf *buf = perf_get_aux(handle); | |
390 | const u64 bufsize = buf->nr_pages * PAGE_SIZE; | |
391 | u64 limit = bufsize; | |
392 | u64 head, tail, wakeup; | |
393 | ||
394 | /* | |
395 | * The head can be misaligned for two reasons: | |
396 | * | |
397 | * 1. The hardware left PMBPTR pointing to the first byte after | |
398 | * a record when generating a buffer management event. | |
399 | * | |
400 | * 2. We used perf_aux_output_skip to consume handle->size bytes | |
401 | * and CIRC_SPACE was used to compute the size, which always | |
402 | * leaves one entry free. | |
403 | * | |
404 | * Deal with this by padding to the next alignment boundary and | |
405 | * moving the head index. If we run out of buffer space, we'll | |
406 | * reduce handle->size to zero and end up reporting truncation. | |
407 | */ | |
408 | head = PERF_IDX2OFF(handle->head, buf); | |
409 | if (!IS_ALIGNED(head, spe_pmu->align)) { | |
410 | unsigned long delta = roundup(head, spe_pmu->align) - head; | |
411 | ||
412 | delta = min(delta, handle->size); | |
413 | arm_spe_pmu_pad_buf(handle, delta); | |
414 | head = PERF_IDX2OFF(handle->head, buf); | |
415 | } | |
416 | ||
417 | /* If we've run out of free space, then nothing more to do */ | |
418 | if (!handle->size) | |
419 | goto no_space; | |
420 | ||
421 | /* Compute the tail and wakeup indices now that we've aligned head */ | |
422 | tail = PERF_IDX2OFF(handle->head + handle->size, buf); | |
423 | wakeup = PERF_IDX2OFF(handle->wakeup, buf); | |
424 | ||
425 | /* | |
426 | * Avoid clobbering unconsumed data. We know we have space, so | |
427 | * if we see head == tail we know that the buffer is empty. If | |
428 | * head > tail, then there's nothing to clobber prior to | |
429 | * wrapping. | |
430 | */ | |
431 | if (head < tail) | |
432 | limit = round_down(tail, PAGE_SIZE); | |
433 | ||
434 | /* | |
435 | * Wakeup may be arbitrarily far into the future. If it's not in | |
436 | * the current generation, either we'll wrap before hitting it, | |
437 | * or it's in the past and has been handled already. | |
438 | * | |
439 | * If there's a wakeup before we wrap, arrange to be woken up by | |
440 | * the page boundary following it. Keep the tail boundary if | |
441 | * that's lower. | |
442 | */ | |
443 | if (handle->wakeup < (handle->head + handle->size) && head <= wakeup) | |
444 | limit = min(limit, round_up(wakeup, PAGE_SIZE)); | |
445 | ||
446 | if (limit > head) | |
447 | return limit; | |
448 | ||
449 | arm_spe_pmu_pad_buf(handle, handle->size); | |
450 | no_space: | |
451 | perf_aux_output_flag(handle, PERF_AUX_FLAG_TRUNCATED); | |
452 | perf_aux_output_end(handle, 0); | |
453 | return 0; | |
454 | } | |
455 | ||
456 | static u64 arm_spe_pmu_next_off(struct perf_output_handle *handle) | |
457 | { | |
458 | struct arm_spe_pmu_buf *buf = perf_get_aux(handle); | |
459 | struct arm_spe_pmu *spe_pmu = to_spe_pmu(handle->event->pmu); | |
460 | u64 limit = __arm_spe_pmu_next_off(handle); | |
461 | u64 head = PERF_IDX2OFF(handle->head, buf); | |
462 | ||
463 | /* | |
464 | * If the head has come too close to the end of the buffer, | |
465 | * then pad to the end and recompute the limit. | |
466 | */ | |
467 | if (limit && (limit - head < spe_pmu->max_record_sz)) { | |
468 | arm_spe_pmu_pad_buf(handle, limit - head); | |
469 | limit = __arm_spe_pmu_next_off(handle); | |
470 | } | |
471 | ||
472 | return limit; | |
473 | } | |
474 | ||
475 | static void arm_spe_perf_aux_output_begin(struct perf_output_handle *handle, | |
476 | struct perf_event *event) | |
477 | { | |
478 | u64 base, limit; | |
479 | struct arm_spe_pmu_buf *buf; | |
480 | ||
481 | /* Start a new aux session */ | |
482 | buf = perf_aux_output_begin(handle, event); | |
483 | if (!buf) { | |
484 | event->hw.state |= PERF_HES_STOPPED; | |
485 | /* | |
486 | * We still need to clear the limit pointer, since the | |
487 | * profiler might only be disabled by virtue of a fault. | |
488 | */ | |
489 | limit = 0; | |
490 | goto out_write_limit; | |
491 | } | |
492 | ||
493 | limit = buf->snapshot ? arm_spe_pmu_next_snapshot_off(handle) | |
494 | : arm_spe_pmu_next_off(handle); | |
495 | if (limit) | |
496 | limit |= BIT(SYS_PMBLIMITR_EL1_E_SHIFT); | |
497 | ||
498 | limit += (u64)buf->base; | |
499 | base = (u64)buf->base + PERF_IDX2OFF(handle->head, buf); | |
500 | write_sysreg_s(base, SYS_PMBPTR_EL1); | |
501 | ||
502 | out_write_limit: | |
503 | write_sysreg_s(limit, SYS_PMBLIMITR_EL1); | |
504 | } | |
505 | ||
506 | static void arm_spe_perf_aux_output_end(struct perf_output_handle *handle) | |
507 | { | |
508 | struct arm_spe_pmu_buf *buf = perf_get_aux(handle); | |
509 | u64 offset, size; | |
510 | ||
511 | offset = read_sysreg_s(SYS_PMBPTR_EL1) - (u64)buf->base; | |
512 | size = offset - PERF_IDX2OFF(handle->head, buf); | |
513 | ||
514 | if (buf->snapshot) | |
515 | handle->head = offset; | |
516 | ||
517 | perf_aux_output_end(handle, size); | |
518 | } | |
519 | ||
520 | static void arm_spe_pmu_disable_and_drain_local(void) | |
521 | { | |
522 | /* Disable profiling at EL0 and EL1 */ | |
523 | write_sysreg_s(0, SYS_PMSCR_EL1); | |
524 | isb(); | |
525 | ||
526 | /* Drain any buffered data */ | |
527 | psb_csync(); | |
528 | dsb(nsh); | |
529 | ||
530 | /* Disable the profiling buffer */ | |
531 | write_sysreg_s(0, SYS_PMBLIMITR_EL1); | |
532 | isb(); | |
533 | } | |
534 | ||
535 | /* IRQ handling */ | |
536 | static enum arm_spe_pmu_buf_fault_action | |
537 | arm_spe_pmu_buf_get_fault_act(struct perf_output_handle *handle) | |
538 | { | |
539 | const char *err_str; | |
540 | u64 pmbsr; | |
541 | enum arm_spe_pmu_buf_fault_action ret; | |
542 | ||
543 | /* | |
544 | * Ensure new profiling data is visible to the CPU and any external | |
545 | * aborts have been resolved. | |
546 | */ | |
547 | psb_csync(); | |
548 | dsb(nsh); | |
549 | ||
550 | /* Ensure hardware updates to PMBPTR_EL1 are visible */ | |
551 | isb(); | |
552 | ||
553 | /* Service required? */ | |
554 | pmbsr = read_sysreg_s(SYS_PMBSR_EL1); | |
555 | if (!(pmbsr & BIT(SYS_PMBSR_EL1_S_SHIFT))) | |
556 | return SPE_PMU_BUF_FAULT_ACT_SPURIOUS; | |
557 | ||
558 | /* | |
559 | * If we've lost data, disable profiling and also set the PARTIAL | |
560 | * flag to indicate that the last record is corrupted. | |
561 | */ | |
562 | if (pmbsr & BIT(SYS_PMBSR_EL1_DL_SHIFT)) | |
563 | perf_aux_output_flag(handle, PERF_AUX_FLAG_TRUNCATED | | |
564 | PERF_AUX_FLAG_PARTIAL); | |
565 | ||
566 | /* Report collisions to userspace so that it can up the period */ | |
567 | if (pmbsr & BIT(SYS_PMBSR_EL1_COLL_SHIFT)) | |
568 | perf_aux_output_flag(handle, PERF_AUX_FLAG_COLLISION); | |
569 | ||
570 | /* We only expect buffer management events */ | |
571 | switch (pmbsr & (SYS_PMBSR_EL1_EC_MASK << SYS_PMBSR_EL1_EC_SHIFT)) { | |
572 | case SYS_PMBSR_EL1_EC_BUF: | |
573 | /* Handled below */ | |
574 | break; | |
575 | case SYS_PMBSR_EL1_EC_FAULT_S1: | |
576 | case SYS_PMBSR_EL1_EC_FAULT_S2: | |
577 | err_str = "Unexpected buffer fault"; | |
578 | goto out_err; | |
579 | default: | |
580 | err_str = "Unknown error code"; | |
581 | goto out_err; | |
582 | } | |
583 | ||
584 | /* Buffer management event */ | |
585 | switch (pmbsr & | |
586 | (SYS_PMBSR_EL1_BUF_BSC_MASK << SYS_PMBSR_EL1_BUF_BSC_SHIFT)) { | |
587 | case SYS_PMBSR_EL1_BUF_BSC_FULL: | |
588 | ret = SPE_PMU_BUF_FAULT_ACT_OK; | |
589 | goto out_stop; | |
590 | default: | |
591 | err_str = "Unknown buffer status code"; | |
592 | } | |
593 | ||
594 | out_err: | |
595 | pr_err_ratelimited("%s on CPU %d [PMBSR=0x%016llx, PMBPTR=0x%016llx, PMBLIMITR=0x%016llx]\n", | |
596 | err_str, smp_processor_id(), pmbsr, | |
597 | read_sysreg_s(SYS_PMBPTR_EL1), | |
598 | read_sysreg_s(SYS_PMBLIMITR_EL1)); | |
599 | ret = SPE_PMU_BUF_FAULT_ACT_FATAL; | |
600 | ||
601 | out_stop: | |
602 | arm_spe_perf_aux_output_end(handle); | |
603 | return ret; | |
604 | } | |
605 | ||
606 | static irqreturn_t arm_spe_pmu_irq_handler(int irq, void *dev) | |
607 | { | |
608 | struct perf_output_handle *handle = dev; | |
609 | struct perf_event *event = handle->event; | |
610 | enum arm_spe_pmu_buf_fault_action act; | |
611 | ||
612 | if (!perf_get_aux(handle)) | |
613 | return IRQ_NONE; | |
614 | ||
615 | act = arm_spe_pmu_buf_get_fault_act(handle); | |
616 | if (act == SPE_PMU_BUF_FAULT_ACT_SPURIOUS) | |
617 | return IRQ_NONE; | |
618 | ||
619 | /* | |
620 | * Ensure perf callbacks have completed, which may disable the | |
621 | * profiling buffer in response to a TRUNCATION flag. | |
622 | */ | |
623 | irq_work_run(); | |
624 | ||
625 | switch (act) { | |
626 | case SPE_PMU_BUF_FAULT_ACT_FATAL: | |
627 | /* | |
628 | * If a fatal exception occurred then leaving the profiling | |
629 | * buffer enabled is a recipe waiting to happen. Since | |
630 | * fatal faults don't always imply truncation, make sure | |
631 | * that the profiling buffer is disabled explicitly before | |
632 | * clearing the syndrome register. | |
633 | */ | |
634 | arm_spe_pmu_disable_and_drain_local(); | |
635 | break; | |
636 | case SPE_PMU_BUF_FAULT_ACT_OK: | |
637 | /* | |
638 | * We handled the fault (the buffer was full), so resume | |
639 | * profiling as long as we didn't detect truncation. | |
640 | * PMBPTR might be misaligned, but we'll burn that bridge | |
641 | * when we get to it. | |
642 | */ | |
643 | if (!(handle->aux_flags & PERF_AUX_FLAG_TRUNCATED)) { | |
644 | arm_spe_perf_aux_output_begin(handle, event); | |
645 | isb(); | |
646 | } | |
647 | break; | |
648 | case SPE_PMU_BUF_FAULT_ACT_SPURIOUS: | |
649 | /* We've seen you before, but GCC has the memory of a sieve. */ | |
650 | break; | |
651 | } | |
652 | ||
653 | /* The buffer pointers are now sane, so resume profiling. */ | |
654 | write_sysreg_s(0, SYS_PMBSR_EL1); | |
655 | return IRQ_HANDLED; | |
656 | } | |
657 | ||
4a669e24 WL |
658 | static u64 arm_spe_pmsevfr_res0(u16 pmsver) |
659 | { | |
660 | switch (pmsver) { | |
661 | case ID_AA64DFR0_PMSVER_8_2: | |
662 | return SYS_PMSEVFR_EL1_RES0_8_2; | |
663 | case ID_AA64DFR0_PMSVER_8_3: | |
664 | /* Return the highest version we support in default */ | |
665 | default: | |
666 | return SYS_PMSEVFR_EL1_RES0_8_3; | |
667 | } | |
668 | } | |
669 | ||
d5d9696b WD |
670 | /* Perf callbacks */ |
671 | static int arm_spe_pmu_event_init(struct perf_event *event) | |
672 | { | |
673 | u64 reg; | |
674 | struct perf_event_attr *attr = &event->attr; | |
675 | struct arm_spe_pmu *spe_pmu = to_spe_pmu(event->pmu); | |
676 | ||
677 | /* This is, of course, deeply driver-specific */ | |
678 | if (attr->type != event->pmu->type) | |
679 | return -ENOENT; | |
680 | ||
681 | if (event->cpu >= 0 && | |
682 | !cpumask_test_cpu(event->cpu, &spe_pmu->supported_cpus)) | |
683 | return -ENOENT; | |
684 | ||
4a669e24 | 685 | if (arm_spe_event_to_pmsevfr(event) & arm_spe_pmsevfr_res0(spe_pmu->pmsver)) |
d5d9696b WD |
686 | return -EOPNOTSUPP; |
687 | ||
688 | if (attr->exclude_idle) | |
689 | return -EOPNOTSUPP; | |
690 | ||
691 | /* | |
692 | * Feedback-directed frequency throttling doesn't work when we | |
693 | * have a buffer of samples. We'd need to manually count the | |
694 | * samples in the buffer when it fills up and adjust the event | |
695 | * count to reflect that. Instead, just force the user to specify | |
696 | * a sample period. | |
697 | */ | |
698 | if (attr->freq) | |
699 | return -EINVAL; | |
700 | ||
701 | reg = arm_spe_event_to_pmsfcr(event); | |
702 | if ((reg & BIT(SYS_PMSFCR_EL1_FE_SHIFT)) && | |
703 | !(spe_pmu->features & SPE_PMU_FEAT_FILT_EVT)) | |
704 | return -EOPNOTSUPP; | |
705 | ||
706 | if ((reg & BIT(SYS_PMSFCR_EL1_FT_SHIFT)) && | |
707 | !(spe_pmu->features & SPE_PMU_FEAT_FILT_TYP)) | |
708 | return -EOPNOTSUPP; | |
709 | ||
710 | if ((reg & BIT(SYS_PMSFCR_EL1_FL_SHIFT)) && | |
711 | !(spe_pmu->features & SPE_PMU_FEAT_FILT_LAT)) | |
712 | return -EOPNOTSUPP; | |
713 | ||
714 | reg = arm_spe_event_to_pmscr(event); | |
cea7d0d4 | 715 | if (!perfmon_capable() && |
d5d9696b WD |
716 | (reg & (BIT(SYS_PMSCR_EL1_PA_SHIFT) | |
717 | BIT(SYS_PMSCR_EL1_CX_SHIFT) | | |
718 | BIT(SYS_PMSCR_EL1_PCT_SHIFT)))) | |
719 | return -EACCES; | |
720 | ||
721 | return 0; | |
722 | } | |
723 | ||
724 | static void arm_spe_pmu_start(struct perf_event *event, int flags) | |
725 | { | |
726 | u64 reg; | |
727 | struct arm_spe_pmu *spe_pmu = to_spe_pmu(event->pmu); | |
728 | struct hw_perf_event *hwc = &event->hw; | |
729 | struct perf_output_handle *handle = this_cpu_ptr(spe_pmu->handle); | |
730 | ||
731 | hwc->state = 0; | |
732 | arm_spe_perf_aux_output_begin(handle, event); | |
733 | if (hwc->state) | |
734 | return; | |
735 | ||
736 | reg = arm_spe_event_to_pmsfcr(event); | |
737 | write_sysreg_s(reg, SYS_PMSFCR_EL1); | |
738 | ||
739 | reg = arm_spe_event_to_pmsevfr(event); | |
740 | write_sysreg_s(reg, SYS_PMSEVFR_EL1); | |
741 | ||
742 | reg = arm_spe_event_to_pmslatfr(event); | |
743 | write_sysreg_s(reg, SYS_PMSLATFR_EL1); | |
744 | ||
745 | if (flags & PERF_EF_RELOAD) { | |
746 | reg = arm_spe_event_to_pmsirr(event); | |
747 | write_sysreg_s(reg, SYS_PMSIRR_EL1); | |
748 | isb(); | |
749 | reg = local64_read(&hwc->period_left); | |
750 | write_sysreg_s(reg, SYS_PMSICR_EL1); | |
751 | } | |
752 | ||
753 | reg = arm_spe_event_to_pmscr(event); | |
754 | isb(); | |
755 | write_sysreg_s(reg, SYS_PMSCR_EL1); | |
756 | } | |
757 | ||
758 | static void arm_spe_pmu_stop(struct perf_event *event, int flags) | |
759 | { | |
760 | struct arm_spe_pmu *spe_pmu = to_spe_pmu(event->pmu); | |
761 | struct hw_perf_event *hwc = &event->hw; | |
762 | struct perf_output_handle *handle = this_cpu_ptr(spe_pmu->handle); | |
763 | ||
764 | /* If we're already stopped, then nothing to do */ | |
765 | if (hwc->state & PERF_HES_STOPPED) | |
766 | return; | |
767 | ||
768 | /* Stop all trace generation */ | |
769 | arm_spe_pmu_disable_and_drain_local(); | |
770 | ||
771 | if (flags & PERF_EF_UPDATE) { | |
772 | /* | |
773 | * If there's a fault pending then ensure we contain it | |
774 | * to this buffer, since we might be on the context-switch | |
775 | * path. | |
776 | */ | |
777 | if (perf_get_aux(handle)) { | |
778 | enum arm_spe_pmu_buf_fault_action act; | |
779 | ||
780 | act = arm_spe_pmu_buf_get_fault_act(handle); | |
781 | if (act == SPE_PMU_BUF_FAULT_ACT_SPURIOUS) | |
782 | arm_spe_perf_aux_output_end(handle); | |
783 | else | |
784 | write_sysreg_s(0, SYS_PMBSR_EL1); | |
785 | } | |
786 | ||
787 | /* | |
788 | * This may also contain ECOUNT, but nobody else should | |
789 | * be looking at period_left, since we forbid frequency | |
790 | * based sampling. | |
791 | */ | |
792 | local64_set(&hwc->period_left, read_sysreg_s(SYS_PMSICR_EL1)); | |
793 | hwc->state |= PERF_HES_UPTODATE; | |
794 | } | |
795 | ||
796 | hwc->state |= PERF_HES_STOPPED; | |
797 | } | |
798 | ||
799 | static int arm_spe_pmu_add(struct perf_event *event, int flags) | |
800 | { | |
801 | int ret = 0; | |
802 | struct arm_spe_pmu *spe_pmu = to_spe_pmu(event->pmu); | |
803 | struct hw_perf_event *hwc = &event->hw; | |
804 | int cpu = event->cpu == -1 ? smp_processor_id() : event->cpu; | |
805 | ||
806 | if (!cpumask_test_cpu(cpu, &spe_pmu->supported_cpus)) | |
807 | return -ENOENT; | |
808 | ||
809 | hwc->state = PERF_HES_UPTODATE | PERF_HES_STOPPED; | |
810 | ||
811 | if (flags & PERF_EF_START) { | |
812 | arm_spe_pmu_start(event, PERF_EF_RELOAD); | |
813 | if (hwc->state & PERF_HES_STOPPED) | |
814 | ret = -EINVAL; | |
815 | } | |
816 | ||
817 | return ret; | |
818 | } | |
819 | ||
820 | static void arm_spe_pmu_del(struct perf_event *event, int flags) | |
821 | { | |
822 | arm_spe_pmu_stop(event, PERF_EF_UPDATE); | |
823 | } | |
824 | ||
825 | static void arm_spe_pmu_read(struct perf_event *event) | |
826 | { | |
827 | } | |
828 | ||
84001866 MP |
829 | static void *arm_spe_pmu_setup_aux(struct perf_event *event, void **pages, |
830 | int nr_pages, bool snapshot) | |
d5d9696b | 831 | { |
84001866 | 832 | int i, cpu = event->cpu; |
d5d9696b WD |
833 | struct page **pglist; |
834 | struct arm_spe_pmu_buf *buf; | |
835 | ||
836 | /* We need at least two pages for this to work. */ | |
837 | if (nr_pages < 2) | |
838 | return NULL; | |
839 | ||
840 | /* | |
841 | * We require an even number of pages for snapshot mode, so that | |
842 | * we can effectively treat the buffer as consisting of two equal | |
843 | * parts and give userspace a fighting chance of getting some | |
844 | * useful data out of it. | |
845 | */ | |
aaa19727 | 846 | if (snapshot && (nr_pages & 1)) |
d5d9696b WD |
847 | return NULL; |
848 | ||
849 | if (cpu == -1) | |
850 | cpu = raw_smp_processor_id(); | |
851 | ||
852 | buf = kzalloc_node(sizeof(*buf), GFP_KERNEL, cpu_to_node(cpu)); | |
853 | if (!buf) | |
854 | return NULL; | |
855 | ||
856 | pglist = kcalloc(nr_pages, sizeof(*pglist), GFP_KERNEL); | |
857 | if (!pglist) | |
858 | goto out_free_buf; | |
859 | ||
14ae42a6 | 860 | for (i = 0; i < nr_pages; ++i) |
d5d9696b | 861 | pglist[i] = virt_to_page(pages[i]); |
d5d9696b WD |
862 | |
863 | buf->base = vmap(pglist, nr_pages, VM_MAP, PAGE_KERNEL); | |
864 | if (!buf->base) | |
865 | goto out_free_pglist; | |
866 | ||
867 | buf->nr_pages = nr_pages; | |
868 | buf->snapshot = snapshot; | |
869 | ||
870 | kfree(pglist); | |
871 | return buf; | |
872 | ||
873 | out_free_pglist: | |
874 | kfree(pglist); | |
875 | out_free_buf: | |
876 | kfree(buf); | |
877 | return NULL; | |
878 | } | |
879 | ||
880 | static void arm_spe_pmu_free_aux(void *aux) | |
881 | { | |
882 | struct arm_spe_pmu_buf *buf = aux; | |
883 | ||
884 | vunmap(buf->base); | |
885 | kfree(buf); | |
886 | } | |
887 | ||
888 | /* Initialisation and teardown functions */ | |
889 | static int arm_spe_pmu_perf_init(struct arm_spe_pmu *spe_pmu) | |
890 | { | |
891 | static atomic_t pmu_idx = ATOMIC_INIT(-1); | |
892 | ||
893 | int idx; | |
894 | char *name; | |
895 | struct device *dev = &spe_pmu->pdev->dev; | |
896 | ||
897 | spe_pmu->pmu = (struct pmu) { | |
19b4aff2 | 898 | .module = THIS_MODULE, |
d5d9696b WD |
899 | .capabilities = PERF_PMU_CAP_EXCLUSIVE | PERF_PMU_CAP_ITRACE, |
900 | .attr_groups = arm_spe_pmu_attr_groups, | |
901 | /* | |
902 | * We hitch a ride on the software context here, so that | |
903 | * we can support per-task profiling (which is not possible | |
904 | * with the invalid context as it doesn't get sched callbacks). | |
905 | * This requires that userspace either uses a dummy event for | |
906 | * perf_event_open, since the aux buffer is not setup until | |
907 | * a subsequent mmap, or creates the profiling event in a | |
908 | * disabled state and explicitly PERF_EVENT_IOC_ENABLEs it | |
909 | * once the buffer has been created. | |
910 | */ | |
911 | .task_ctx_nr = perf_sw_context, | |
912 | .event_init = arm_spe_pmu_event_init, | |
913 | .add = arm_spe_pmu_add, | |
914 | .del = arm_spe_pmu_del, | |
915 | .start = arm_spe_pmu_start, | |
916 | .stop = arm_spe_pmu_stop, | |
917 | .read = arm_spe_pmu_read, | |
918 | .setup_aux = arm_spe_pmu_setup_aux, | |
919 | .free_aux = arm_spe_pmu_free_aux, | |
920 | }; | |
921 | ||
922 | idx = atomic_inc_return(&pmu_idx); | |
923 | name = devm_kasprintf(dev, GFP_KERNEL, "%s_%d", PMUNAME, idx); | |
754a58db NMG |
924 | if (!name) { |
925 | dev_err(dev, "failed to allocate name for pmu %d\n", idx); | |
926 | return -ENOMEM; | |
927 | } | |
928 | ||
d5d9696b WD |
929 | return perf_pmu_register(&spe_pmu->pmu, name, -1); |
930 | } | |
931 | ||
932 | static void arm_spe_pmu_perf_destroy(struct arm_spe_pmu *spe_pmu) | |
933 | { | |
934 | perf_pmu_unregister(&spe_pmu->pmu); | |
935 | } | |
936 | ||
937 | static void __arm_spe_pmu_dev_probe(void *info) | |
938 | { | |
939 | int fld; | |
940 | u64 reg; | |
941 | struct arm_spe_pmu *spe_pmu = info; | |
942 | struct device *dev = &spe_pmu->pdev->dev; | |
943 | ||
944 | fld = cpuid_feature_extract_unsigned_field(read_cpuid(ID_AA64DFR0_EL1), | |
945 | ID_AA64DFR0_PMSVER_SHIFT); | |
946 | if (!fld) { | |
947 | dev_err(dev, | |
948 | "unsupported ID_AA64DFR0_EL1.PMSVer [%d] on CPU %d\n", | |
949 | fld, smp_processor_id()); | |
950 | return; | |
951 | } | |
4a669e24 | 952 | spe_pmu->pmsver = (u16)fld; |
d5d9696b WD |
953 | |
954 | /* Read PMBIDR first to determine whether or not we have access */ | |
955 | reg = read_sysreg_s(SYS_PMBIDR_EL1); | |
956 | if (reg & BIT(SYS_PMBIDR_EL1_P_SHIFT)) { | |
957 | dev_err(dev, | |
958 | "profiling buffer owned by higher exception level\n"); | |
959 | return; | |
960 | } | |
961 | ||
962 | /* Minimum alignment. If it's out-of-range, then fail the probe */ | |
963 | fld = reg >> SYS_PMBIDR_EL1_ALIGN_SHIFT & SYS_PMBIDR_EL1_ALIGN_MASK; | |
964 | spe_pmu->align = 1 << fld; | |
965 | if (spe_pmu->align > SZ_2K) { | |
966 | dev_err(dev, "unsupported PMBIDR.Align [%d] on CPU %d\n", | |
967 | fld, smp_processor_id()); | |
968 | return; | |
969 | } | |
970 | ||
971 | /* It's now safe to read PMSIDR and figure out what we've got */ | |
972 | reg = read_sysreg_s(SYS_PMSIDR_EL1); | |
973 | if (reg & BIT(SYS_PMSIDR_EL1_FE_SHIFT)) | |
974 | spe_pmu->features |= SPE_PMU_FEAT_FILT_EVT; | |
975 | ||
976 | if (reg & BIT(SYS_PMSIDR_EL1_FT_SHIFT)) | |
977 | spe_pmu->features |= SPE_PMU_FEAT_FILT_TYP; | |
978 | ||
979 | if (reg & BIT(SYS_PMSIDR_EL1_FL_SHIFT)) | |
980 | spe_pmu->features |= SPE_PMU_FEAT_FILT_LAT; | |
981 | ||
982 | if (reg & BIT(SYS_PMSIDR_EL1_ARCHINST_SHIFT)) | |
983 | spe_pmu->features |= SPE_PMU_FEAT_ARCH_INST; | |
984 | ||
985 | if (reg & BIT(SYS_PMSIDR_EL1_LDS_SHIFT)) | |
986 | spe_pmu->features |= SPE_PMU_FEAT_LDS; | |
987 | ||
988 | if (reg & BIT(SYS_PMSIDR_EL1_ERND_SHIFT)) | |
989 | spe_pmu->features |= SPE_PMU_FEAT_ERND; | |
990 | ||
991 | /* This field has a spaced out encoding, so just use a look-up */ | |
992 | fld = reg >> SYS_PMSIDR_EL1_INTERVAL_SHIFT & SYS_PMSIDR_EL1_INTERVAL_MASK; | |
993 | switch (fld) { | |
994 | case 0: | |
995 | spe_pmu->min_period = 256; | |
996 | break; | |
997 | case 2: | |
998 | spe_pmu->min_period = 512; | |
999 | break; | |
1000 | case 3: | |
1001 | spe_pmu->min_period = 768; | |
1002 | break; | |
1003 | case 4: | |
1004 | spe_pmu->min_period = 1024; | |
1005 | break; | |
1006 | case 5: | |
1007 | spe_pmu->min_period = 1536; | |
1008 | break; | |
1009 | case 6: | |
1010 | spe_pmu->min_period = 2048; | |
1011 | break; | |
1012 | case 7: | |
1013 | spe_pmu->min_period = 3072; | |
1014 | break; | |
1015 | default: | |
1016 | dev_warn(dev, "unknown PMSIDR_EL1.Interval [%d]; assuming 8\n", | |
1017 | fld); | |
df561f66 | 1018 | fallthrough; |
d5d9696b WD |
1019 | case 8: |
1020 | spe_pmu->min_period = 4096; | |
1021 | } | |
1022 | ||
1023 | /* Maximum record size. If it's out-of-range, then fail the probe */ | |
1024 | fld = reg >> SYS_PMSIDR_EL1_MAXSIZE_SHIFT & SYS_PMSIDR_EL1_MAXSIZE_MASK; | |
1025 | spe_pmu->max_record_sz = 1 << fld; | |
1026 | if (spe_pmu->max_record_sz > SZ_2K || spe_pmu->max_record_sz < 16) { | |
1027 | dev_err(dev, "unsupported PMSIDR_EL1.MaxSize [%d] on CPU %d\n", | |
1028 | fld, smp_processor_id()); | |
1029 | return; | |
1030 | } | |
1031 | ||
1032 | fld = reg >> SYS_PMSIDR_EL1_COUNTSIZE_SHIFT & SYS_PMSIDR_EL1_COUNTSIZE_MASK; | |
1033 | switch (fld) { | |
1034 | default: | |
1035 | dev_warn(dev, "unknown PMSIDR_EL1.CountSize [%d]; assuming 2\n", | |
1036 | fld); | |
df561f66 | 1037 | fallthrough; |
d5d9696b WD |
1038 | case 2: |
1039 | spe_pmu->counter_sz = 12; | |
1040 | } | |
1041 | ||
1042 | dev_info(dev, | |
1043 | "probed for CPUs %*pbl [max_record_sz %u, align %u, features 0x%llx]\n", | |
1044 | cpumask_pr_args(&spe_pmu->supported_cpus), | |
1045 | spe_pmu->max_record_sz, spe_pmu->align, spe_pmu->features); | |
1046 | ||
1047 | spe_pmu->features |= SPE_PMU_FEAT_DEV_PROBED; | |
1048 | return; | |
1049 | } | |
1050 | ||
1051 | static void __arm_spe_pmu_reset_local(void) | |
1052 | { | |
1053 | /* | |
1054 | * This is probably overkill, as we have no idea where we're | |
1055 | * draining any buffered data to... | |
1056 | */ | |
1057 | arm_spe_pmu_disable_and_drain_local(); | |
1058 | ||
1059 | /* Reset the buffer base pointer */ | |
1060 | write_sysreg_s(0, SYS_PMBPTR_EL1); | |
1061 | isb(); | |
1062 | ||
1063 | /* Clear any pending management interrupts */ | |
1064 | write_sysreg_s(0, SYS_PMBSR_EL1); | |
1065 | isb(); | |
1066 | } | |
1067 | ||
1068 | static void __arm_spe_pmu_setup_one(void *info) | |
1069 | { | |
1070 | struct arm_spe_pmu *spe_pmu = info; | |
1071 | ||
1072 | __arm_spe_pmu_reset_local(); | |
1073 | enable_percpu_irq(spe_pmu->irq, IRQ_TYPE_NONE); | |
1074 | } | |
1075 | ||
1076 | static void __arm_spe_pmu_stop_one(void *info) | |
1077 | { | |
1078 | struct arm_spe_pmu *spe_pmu = info; | |
1079 | ||
1080 | disable_percpu_irq(spe_pmu->irq); | |
1081 | __arm_spe_pmu_reset_local(); | |
1082 | } | |
1083 | ||
1084 | static int arm_spe_pmu_cpu_startup(unsigned int cpu, struct hlist_node *node) | |
1085 | { | |
1086 | struct arm_spe_pmu *spe_pmu; | |
1087 | ||
1088 | spe_pmu = hlist_entry_safe(node, struct arm_spe_pmu, hotplug_node); | |
1089 | if (!cpumask_test_cpu(cpu, &spe_pmu->supported_cpus)) | |
1090 | return 0; | |
1091 | ||
1092 | __arm_spe_pmu_setup_one(spe_pmu); | |
1093 | return 0; | |
1094 | } | |
1095 | ||
1096 | static int arm_spe_pmu_cpu_teardown(unsigned int cpu, struct hlist_node *node) | |
1097 | { | |
1098 | struct arm_spe_pmu *spe_pmu; | |
1099 | ||
1100 | spe_pmu = hlist_entry_safe(node, struct arm_spe_pmu, hotplug_node); | |
1101 | if (!cpumask_test_cpu(cpu, &spe_pmu->supported_cpus)) | |
1102 | return 0; | |
1103 | ||
1104 | __arm_spe_pmu_stop_one(spe_pmu); | |
1105 | return 0; | |
1106 | } | |
1107 | ||
1108 | static int arm_spe_pmu_dev_init(struct arm_spe_pmu *spe_pmu) | |
1109 | { | |
1110 | int ret; | |
1111 | cpumask_t *mask = &spe_pmu->supported_cpus; | |
1112 | ||
1113 | /* Make sure we probe the hardware on a relevant CPU */ | |
1114 | ret = smp_call_function_any(mask, __arm_spe_pmu_dev_probe, spe_pmu, 1); | |
1115 | if (ret || !(spe_pmu->features & SPE_PMU_FEAT_DEV_PROBED)) | |
1116 | return -ENXIO; | |
1117 | ||
1118 | /* Request our PPIs (note that the IRQ is still disabled) */ | |
1119 | ret = request_percpu_irq(spe_pmu->irq, arm_spe_pmu_irq_handler, DRVNAME, | |
1120 | spe_pmu->handle); | |
1121 | if (ret) | |
1122 | return ret; | |
1123 | ||
1124 | /* | |
1125 | * Register our hotplug notifier now so we don't miss any events. | |
1126 | * This will enable the IRQ for any supported CPUs that are already | |
1127 | * up. | |
1128 | */ | |
1129 | ret = cpuhp_state_add_instance(arm_spe_pmu_online, | |
1130 | &spe_pmu->hotplug_node); | |
1131 | if (ret) | |
1132 | free_percpu_irq(spe_pmu->irq, spe_pmu->handle); | |
1133 | ||
1134 | return ret; | |
1135 | } | |
1136 | ||
1137 | static void arm_spe_pmu_dev_teardown(struct arm_spe_pmu *spe_pmu) | |
1138 | { | |
1139 | cpuhp_state_remove_instance(arm_spe_pmu_online, &spe_pmu->hotplug_node); | |
1140 | free_percpu_irq(spe_pmu->irq, spe_pmu->handle); | |
1141 | } | |
1142 | ||
1143 | /* Driver and device probing */ | |
1144 | static int arm_spe_pmu_irq_probe(struct arm_spe_pmu *spe_pmu) | |
1145 | { | |
1146 | struct platform_device *pdev = spe_pmu->pdev; | |
1147 | int irq = platform_get_irq(pdev, 0); | |
1148 | ||
1f0d97bb | 1149 | if (irq < 0) |
d5d9696b | 1150 | return -ENXIO; |
d5d9696b WD |
1151 | |
1152 | if (!irq_is_percpu(irq)) { | |
1153 | dev_err(&pdev->dev, "expected PPI but got SPI (%d)\n", irq); | |
1154 | return -EINVAL; | |
1155 | } | |
1156 | ||
1157 | if (irq_get_percpu_devid_partition(irq, &spe_pmu->supported_cpus)) { | |
1158 | dev_err(&pdev->dev, "failed to get PPI partition (%d)\n", irq); | |
1159 | return -EINVAL; | |
1160 | } | |
1161 | ||
1162 | spe_pmu->irq = irq; | |
1163 | return 0; | |
1164 | } | |
1165 | ||
1166 | static const struct of_device_id arm_spe_pmu_of_match[] = { | |
1167 | { .compatible = "arm,statistical-profiling-extension-v1", .data = (void *)1 }, | |
1168 | { /* Sentinel */ }, | |
1169 | }; | |
472dc9fa | 1170 | MODULE_DEVICE_TABLE(of, arm_spe_pmu_of_match); |
d5d9696b | 1171 | |
d482e575 JL |
1172 | static const struct platform_device_id arm_spe_match[] = { |
1173 | { ARMV8_SPE_PDEV_NAME, 0}, | |
1174 | { } | |
1175 | }; | |
1176 | MODULE_DEVICE_TABLE(platform, arm_spe_match); | |
1177 | ||
1178 | static int arm_spe_pmu_device_probe(struct platform_device *pdev) | |
d5d9696b WD |
1179 | { |
1180 | int ret; | |
1181 | struct arm_spe_pmu *spe_pmu; | |
1182 | struct device *dev = &pdev->dev; | |
1183 | ||
7a4a0c15 WD |
1184 | /* |
1185 | * If kernelspace is unmapped when running at EL0, then the SPE | |
1186 | * buffer will fault and prematurely terminate the AUX session. | |
1187 | */ | |
1188 | if (arm64_kernel_unmapped_at_el0()) { | |
1189 | dev_warn_once(dev, "profiling buffer inaccessible. Try passing \"kpti=off\" on the kernel command line\n"); | |
1190 | return -EPERM; | |
1191 | } | |
1192 | ||
d5d9696b WD |
1193 | spe_pmu = devm_kzalloc(dev, sizeof(*spe_pmu), GFP_KERNEL); |
1194 | if (!spe_pmu) { | |
1195 | dev_err(dev, "failed to allocate spe_pmu\n"); | |
1196 | return -ENOMEM; | |
1197 | } | |
1198 | ||
1199 | spe_pmu->handle = alloc_percpu(typeof(*spe_pmu->handle)); | |
1200 | if (!spe_pmu->handle) | |
1201 | return -ENOMEM; | |
1202 | ||
1203 | spe_pmu->pdev = pdev; | |
1204 | platform_set_drvdata(pdev, spe_pmu); | |
1205 | ||
1206 | ret = arm_spe_pmu_irq_probe(spe_pmu); | |
1207 | if (ret) | |
1208 | goto out_free_handle; | |
1209 | ||
1210 | ret = arm_spe_pmu_dev_init(spe_pmu); | |
1211 | if (ret) | |
1212 | goto out_free_handle; | |
1213 | ||
1214 | ret = arm_spe_pmu_perf_init(spe_pmu); | |
1215 | if (ret) | |
1216 | goto out_teardown_dev; | |
1217 | ||
1218 | return 0; | |
1219 | ||
1220 | out_teardown_dev: | |
1221 | arm_spe_pmu_dev_teardown(spe_pmu); | |
1222 | out_free_handle: | |
1223 | free_percpu(spe_pmu->handle); | |
1224 | return ret; | |
1225 | } | |
1226 | ||
1227 | static int arm_spe_pmu_device_remove(struct platform_device *pdev) | |
1228 | { | |
1229 | struct arm_spe_pmu *spe_pmu = platform_get_drvdata(pdev); | |
1230 | ||
1231 | arm_spe_pmu_perf_destroy(spe_pmu); | |
1232 | arm_spe_pmu_dev_teardown(spe_pmu); | |
1233 | free_percpu(spe_pmu->handle); | |
1234 | return 0; | |
1235 | } | |
1236 | ||
1237 | static struct platform_driver arm_spe_pmu_driver = { | |
d482e575 | 1238 | .id_table = arm_spe_match, |
d5d9696b WD |
1239 | .driver = { |
1240 | .name = DRVNAME, | |
1241 | .of_match_table = of_match_ptr(arm_spe_pmu_of_match), | |
f32ed8eb | 1242 | .suppress_bind_attrs = true, |
d5d9696b | 1243 | }, |
d482e575 | 1244 | .probe = arm_spe_pmu_device_probe, |
d5d9696b WD |
1245 | .remove = arm_spe_pmu_device_remove, |
1246 | }; | |
1247 | ||
1248 | static int __init arm_spe_pmu_init(void) | |
1249 | { | |
1250 | int ret; | |
1251 | ||
1252 | ret = cpuhp_setup_state_multi(CPUHP_AP_ONLINE_DYN, DRVNAME, | |
1253 | arm_spe_pmu_cpu_startup, | |
1254 | arm_spe_pmu_cpu_teardown); | |
1255 | if (ret < 0) | |
1256 | return ret; | |
1257 | arm_spe_pmu_online = ret; | |
1258 | ||
1259 | ret = platform_driver_register(&arm_spe_pmu_driver); | |
1260 | if (ret) | |
1261 | cpuhp_remove_multi_state(arm_spe_pmu_online); | |
1262 | ||
1263 | return ret; | |
1264 | } | |
1265 | ||
1266 | static void __exit arm_spe_pmu_exit(void) | |
1267 | { | |
1268 | platform_driver_unregister(&arm_spe_pmu_driver); | |
1269 | cpuhp_remove_multi_state(arm_spe_pmu_online); | |
1270 | } | |
1271 | ||
1272 | module_init(arm_spe_pmu_init); | |
1273 | module_exit(arm_spe_pmu_exit); | |
1274 | ||
1275 | MODULE_DESCRIPTION("Perf driver for the ARMv8.2 Statistical Profiling Extension"); | |
1276 | MODULE_AUTHOR("Will Deacon <will.deacon@arm.com>"); | |
1277 | MODULE_LICENSE("GPL v2"); |