]>
Commit | Line | Data |
---|---|---|
8c069ff4 HB |
1 | /* |
2 | * Performance event support for the System z CPU-measurement Sampling Facility | |
3 | * | |
4 | * Copyright IBM Corp. 2013 | |
5 | * Author(s): Hendrik Brueckner <brueckner@linux.vnet.ibm.com> | |
6 | * | |
7 | * This program is free software; you can redistribute it and/or modify | |
8 | * it under the terms of the GNU General Public License (version 2 only) | |
9 | * as published by the Free Software Foundation. | |
10 | */ | |
11 | #define KMSG_COMPONENT "cpum_sf" | |
12 | #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt | |
13 | ||
14 | #include <linux/kernel.h> | |
15 | #include <linux/kernel_stat.h> | |
16 | #include <linux/perf_event.h> | |
17 | #include <linux/percpu.h> | |
18 | #include <linux/notifier.h> | |
19 | #include <linux/export.h> | |
7e75fc3f | 20 | #include <linux/slab.h> |
69f239ed HB |
21 | #include <linux/mm.h> |
22 | #include <linux/moduleparam.h> | |
8c069ff4 HB |
23 | #include <asm/cpu_mf.h> |
24 | #include <asm/irq.h> | |
25 | #include <asm/debug.h> | |
26 | #include <asm/timex.h> | |
27 | ||
28 | /* Minimum number of sample-data-block-tables: | |
29 | * At least one table is required for the sampling buffer structure. | |
30 | * A single table contains up to 511 pointers to sample-data-blocks. | |
31 | */ | |
69f239ed | 32 | #define CPUM_SF_MIN_SDBT 1 |
8c069ff4 | 33 | |
69f239ed | 34 | /* Number of sample-data-blocks per sample-data-block-table (SDBT): |
7e75fc3f HB |
35 | * A table contains SDB pointers (8 bytes) and one table-link entry |
36 | * that points to the origin of the next SDBT. | |
8c069ff4 | 37 | */ |
69f239ed | 38 | #define CPUM_SF_SDB_PER_TABLE ((PAGE_SIZE - 8) / 8) |
8c069ff4 | 39 | |
69f239ed HB |
40 | /* Maximum page offset for an SDBT table-link entry: |
41 | * If this page offset is reached, a table-link entry to the next SDBT | |
42 | * must be added. | |
43 | */ | |
44 | #define CPUM_SF_SDBT_TL_OFFSET (CPUM_SF_SDB_PER_TABLE * 8) | |
45 | static inline int require_table_link(const void *sdbt) | |
46 | { | |
47 | return ((unsigned long) sdbt & ~PAGE_MASK) == CPUM_SF_SDBT_TL_OFFSET; | |
48 | } | |
49 | ||
50 | /* Minimum and maximum sampling buffer sizes: | |
51 | * | |
7e75fc3f HB |
52 | * This number represents the maximum size of the sampling buffer taking |
53 | * the number of sample-data-block-tables into account. Note that these | |
54 | * numbers apply to the basic-sampling function only. | |
55 | * The maximum number of SDBs is increased by CPUM_SF_SDB_DIAG_FACTOR if | |
56 | * the diagnostic-sampling function is active. | |
8c069ff4 | 57 | * |
69f239ed HB |
58 | * Sampling buffer size Buffer characteristics |
59 | * --------------------------------------------------- | |
60 | * 64KB == 16 pages (4KB per page) | |
61 | * 1 page for SDB-tables | |
62 | * 15 pages for SDBs | |
63 | * | |
64 | * 32MB == 8192 pages (4KB per page) | |
65 | * 16 pages for SDB-tables | |
66 | * 8176 pages for SDBs | |
8c069ff4 | 67 | */ |
69f239ed HB |
68 | static unsigned long __read_mostly CPUM_SF_MIN_SDB = 15; |
69 | static unsigned long __read_mostly CPUM_SF_MAX_SDB = 8176; | |
7e75fc3f | 70 | static unsigned long __read_mostly CPUM_SF_SDB_DIAG_FACTOR = 1; |
8c069ff4 HB |
71 | |
72 | struct sf_buffer { | |
69f239ed | 73 | unsigned long *sdbt; /* Sample-data-block-table origin */ |
8c069ff4 | 74 | /* buffer characteristics (required for buffer increments) */ |
69f239ed HB |
75 | unsigned long num_sdb; /* Number of sample-data-blocks */ |
76 | unsigned long num_sdbt; /* Number of sample-data-block-tables */ | |
77 | unsigned long *tail; /* last sample-data-block-table */ | |
8c069ff4 HB |
78 | }; |
79 | ||
80 | struct cpu_hw_sf { | |
81 | /* CPU-measurement sampling information block */ | |
82 | struct hws_qsi_info_block qsi; | |
69f239ed | 83 | /* CPU-measurement sampling control block */ |
8c069ff4 HB |
84 | struct hws_lsctl_request_block lsctl; |
85 | struct sf_buffer sfb; /* Sampling buffer */ | |
86 | unsigned int flags; /* Status flags */ | |
87 | struct perf_event *event; /* Scheduled perf event */ | |
88 | }; | |
89 | static DEFINE_PER_CPU(struct cpu_hw_sf, cpu_hw_sf); | |
90 | ||
91 | /* Debug feature */ | |
92 | static debug_info_t *sfdbg; | |
93 | ||
69f239ed HB |
94 | /* |
95 | * sf_disable() - Switch off sampling facility | |
96 | */ | |
97 | static int sf_disable(void) | |
98 | { | |
99 | struct hws_lsctl_request_block sreq; | |
100 | ||
101 | memset(&sreq, 0, sizeof(sreq)); | |
102 | return lsctl(&sreq); | |
103 | } | |
104 | ||
8c069ff4 HB |
105 | /* |
106 | * sf_buffer_available() - Check for an allocated sampling buffer | |
107 | */ | |
108 | static int sf_buffer_available(struct cpu_hw_sf *cpuhw) | |
109 | { | |
69f239ed | 110 | return !!cpuhw->sfb.sdbt; |
8c069ff4 HB |
111 | } |
112 | ||
113 | /* | |
114 | * deallocate sampling facility buffer | |
115 | */ | |
116 | static void free_sampling_buffer(struct sf_buffer *sfb) | |
117 | { | |
69f239ed | 118 | unsigned long *sdbt, *curr; |
8c069ff4 HB |
119 | |
120 | if (!sfb->sdbt) | |
121 | return; | |
122 | ||
123 | sdbt = sfb->sdbt; | |
69f239ed | 124 | curr = sdbt; |
8c069ff4 | 125 | |
69f239ed | 126 | /* Free the SDBT after all SDBs are processed... */ |
8c069ff4 HB |
127 | while (1) { |
128 | if (!*curr || !sdbt) | |
129 | break; | |
130 | ||
69f239ed | 131 | /* Process table-link entries */ |
8c069ff4 HB |
132 | if (is_link_entry(curr)) { |
133 | curr = get_next_sdbt(curr); | |
134 | if (sdbt) | |
69f239ed | 135 | free_page((unsigned long) sdbt); |
8c069ff4 | 136 | |
69f239ed HB |
137 | /* If the origin is reached, sampling buffer is freed */ |
138 | if (curr == sfb->sdbt) | |
8c069ff4 HB |
139 | break; |
140 | else | |
69f239ed | 141 | sdbt = curr; |
8c069ff4 | 142 | } else { |
69f239ed | 143 | /* Process SDB pointer */ |
8c069ff4 HB |
144 | if (*curr) { |
145 | free_page(*curr); | |
146 | curr++; | |
147 | } | |
148 | } | |
149 | } | |
150 | ||
151 | debug_sprintf_event(sfdbg, 5, | |
69f239ed | 152 | "free_sampling_buffer: freed sdbt=%p\n", sfb->sdbt); |
8c069ff4 HB |
153 | memset(sfb, 0, sizeof(*sfb)); |
154 | } | |
155 | ||
69f239ed HB |
156 | static int alloc_sample_data_block(unsigned long *sdbt, gfp_t gfp_flags) |
157 | { | |
158 | unsigned long sdb, *trailer; | |
159 | ||
160 | /* Allocate and initialize sample-data-block */ | |
161 | sdb = get_zeroed_page(gfp_flags); | |
162 | if (!sdb) | |
163 | return -ENOMEM; | |
164 | trailer = trailer_entry_ptr(sdb); | |
165 | *trailer = SDB_TE_ALERT_REQ_MASK; | |
166 | ||
167 | /* Link SDB into the sample-data-block-table */ | |
168 | *sdbt = sdb; | |
169 | ||
170 | return 0; | |
171 | } | |
172 | ||
173 | /* | |
174 | * realloc_sampling_buffer() - extend sampler memory | |
175 | * | |
176 | * Allocates new sample-data-blocks and adds them to the specified sampling | |
177 | * buffer memory. | |
178 | * | |
179 | * Important: This modifies the sampling buffer and must be called when the | |
180 | * sampling facility is disabled. | |
181 | * | |
182 | * Returns zero on success, non-zero otherwise. | |
183 | */ | |
184 | static int realloc_sampling_buffer(struct sf_buffer *sfb, | |
185 | unsigned long num_sdb, gfp_t gfp_flags) | |
186 | { | |
187 | int i, rc; | |
188 | unsigned long *new, *tail; | |
189 | ||
190 | if (!sfb->sdbt || !sfb->tail) | |
191 | return -EINVAL; | |
192 | ||
193 | if (!is_link_entry(sfb->tail)) | |
194 | return -EINVAL; | |
195 | ||
196 | /* Append to the existing sampling buffer, overwriting the table-link | |
197 | * register. | |
198 | * The tail variables always points to the "tail" (last and table-link) | |
199 | * entry in an SDB-table. | |
200 | */ | |
201 | tail = sfb->tail; | |
202 | ||
203 | /* Do a sanity check whether the table-link entry points to | |
204 | * the sampling buffer origin. | |
205 | */ | |
206 | if (sfb->sdbt != get_next_sdbt(tail)) { | |
207 | debug_sprintf_event(sfdbg, 3, "realloc_sampling_buffer: " | |
208 | "sampling buffer is not linked: origin=%p" | |
209 | "tail=%p\n", | |
210 | (void *) sfb->sdbt, (void *) tail); | |
211 | return -EINVAL; | |
212 | } | |
213 | ||
214 | /* Allocate remaining SDBs */ | |
215 | rc = 0; | |
216 | for (i = 0; i < num_sdb; i++) { | |
217 | /* Allocate a new SDB-table if it is full. */ | |
218 | if (require_table_link(tail)) { | |
219 | new = (unsigned long *) get_zeroed_page(gfp_flags); | |
220 | if (!new) { | |
221 | rc = -ENOMEM; | |
222 | break; | |
223 | } | |
224 | sfb->num_sdbt++; | |
225 | /* Link current page to tail of chain */ | |
226 | *tail = (unsigned long)(void *) new + 1; | |
227 | tail = new; | |
228 | } | |
229 | ||
230 | /* Allocate a new sample-data-block. | |
231 | * If there is not enough memory, stop the realloc process | |
232 | * and simply use what was allocated. If this is a temporary | |
233 | * issue, a new realloc call (if required) might succeed. | |
234 | */ | |
235 | rc = alloc_sample_data_block(tail, gfp_flags); | |
236 | if (rc) | |
237 | break; | |
238 | sfb->num_sdb++; | |
239 | tail++; | |
240 | } | |
241 | ||
242 | /* Link sampling buffer to its origin */ | |
243 | *tail = (unsigned long) sfb->sdbt + 1; | |
244 | sfb->tail = tail; | |
245 | ||
246 | debug_sprintf_event(sfdbg, 4, "realloc_sampling_buffer: new buffer" | |
247 | " settings: sdbt=%lu sdb=%lu\n", | |
248 | sfb->num_sdbt, sfb->num_sdb); | |
249 | return rc; | |
250 | } | |
251 | ||
8c069ff4 HB |
252 | /* |
253 | * allocate_sampling_buffer() - allocate sampler memory | |
254 | * | |
255 | * Allocates and initializes a sampling buffer structure using the | |
256 | * specified number of sample-data-blocks (SDB). For each allocation, | |
257 | * a 4K page is used. The number of sample-data-block-tables (SDBT) | |
258 | * are calculated from SDBs. | |
259 | * Also set the ALERT_REQ mask in each SDBs trailer. | |
260 | * | |
261 | * Returns zero on success, non-zero otherwise. | |
262 | */ | |
263 | static int alloc_sampling_buffer(struct sf_buffer *sfb, unsigned long num_sdb) | |
264 | { | |
69f239ed | 265 | int rc; |
8c069ff4 HB |
266 | |
267 | if (sfb->sdbt) | |
268 | return -EINVAL; | |
69f239ed HB |
269 | |
270 | /* Allocate the sample-data-block-table origin */ | |
271 | sfb->sdbt = (unsigned long *) get_zeroed_page(GFP_KERNEL); | |
272 | if (!sfb->sdbt) | |
273 | return -ENOMEM; | |
8c069ff4 | 274 | sfb->num_sdb = 0; |
69f239ed | 275 | sfb->num_sdbt = 1; |
8c069ff4 | 276 | |
69f239ed HB |
277 | /* Link the table origin to point to itself to prepare for |
278 | * realloc_sampling_buffer() invocation. | |
279 | */ | |
280 | sfb->tail = sfb->sdbt; | |
281 | *sfb->tail = (unsigned long)(void *) sfb->sdbt + 1; | |
8c069ff4 | 282 | |
69f239ed HB |
283 | /* Allocate requested number of sample-data-blocks */ |
284 | rc = realloc_sampling_buffer(sfb, num_sdb, GFP_KERNEL); | |
285 | if (rc) { | |
286 | free_sampling_buffer(sfb); | |
287 | debug_sprintf_event(sfdbg, 4, "alloc_sampling_buffer: " | |
288 | "realloc_sampling_buffer failed with rc=%i\n", rc); | |
289 | } else | |
290 | debug_sprintf_event(sfdbg, 4, | |
291 | "alloc_sampling_buffer: tear=%p dear=%p\n", | |
292 | sfb->sdbt, (void *) *sfb->sdbt); | |
293 | return rc; | |
294 | } | |
8c069ff4 | 295 | |
69f239ed HB |
296 | static void sfb_set_limits(unsigned long min, unsigned long max) |
297 | { | |
7e75fc3f HB |
298 | struct hws_qsi_info_block si; |
299 | ||
69f239ed HB |
300 | CPUM_SF_MIN_SDB = min; |
301 | CPUM_SF_MAX_SDB = max; | |
7e75fc3f HB |
302 | |
303 | memset(&si, 0, sizeof(si)); | |
304 | if (!qsi(&si)) | |
305 | CPUM_SF_SDB_DIAG_FACTOR = DIV_ROUND_UP(si.dsdes, si.bsdes); | |
306 | } | |
307 | ||
308 | static unsigned long sfb_max_limit(struct hw_perf_event *hwc) | |
309 | { | |
310 | return SAMPL_DIAG_MODE(hwc) ? CPUM_SF_MAX_SDB * CPUM_SF_SDB_DIAG_FACTOR | |
311 | : CPUM_SF_MAX_SDB; | |
69f239ed | 312 | } |
8c069ff4 | 313 | |
69f239ed HB |
314 | static unsigned long sfb_pending_allocs(struct sf_buffer *sfb, |
315 | struct hw_perf_event *hwc) | |
316 | { | |
317 | if (!sfb->sdbt) | |
318 | return SFB_ALLOC_REG(hwc); | |
319 | if (SFB_ALLOC_REG(hwc) > sfb->num_sdb) | |
320 | return SFB_ALLOC_REG(hwc) - sfb->num_sdb; | |
321 | return 0; | |
322 | } | |
8c069ff4 | 323 | |
69f239ed HB |
324 | static int sfb_has_pending_allocs(struct sf_buffer *sfb, |
325 | struct hw_perf_event *hwc) | |
326 | { | |
327 | return sfb_pending_allocs(sfb, hwc) > 0; | |
328 | } | |
8c069ff4 | 329 | |
69f239ed HB |
330 | static void sfb_account_allocs(unsigned long num, struct hw_perf_event *hwc) |
331 | { | |
7e75fc3f HB |
332 | /* Limit the number of SDBs to not exceed the maximum */ |
333 | num = min_t(unsigned long, num, sfb_max_limit(hwc) - SFB_ALLOC_REG(hwc)); | |
69f239ed HB |
334 | if (num) |
335 | SFB_ALLOC_REG(hwc) += num; | |
8c069ff4 HB |
336 | } |
337 | ||
69f239ed HB |
338 | static void sfb_init_allocs(unsigned long num, struct hw_perf_event *hwc) |
339 | { | |
340 | SFB_ALLOC_REG(hwc) = 0; | |
341 | sfb_account_allocs(num, hwc); | |
342 | } | |
343 | ||
7e75fc3f HB |
344 | static size_t event_sample_size(struct hw_perf_event *hwc) |
345 | { | |
346 | struct sf_raw_sample *sfr = (struct sf_raw_sample *) RAWSAMPLE_REG(hwc); | |
347 | size_t sample_size; | |
348 | ||
349 | /* The sample size depends on the sampling function: The basic-sampling | |
350 | * function must be always enabled, diagnostic-sampling function is | |
351 | * optional. | |
352 | */ | |
353 | sample_size = sfr->bsdes; | |
354 | if (SAMPL_DIAG_MODE(hwc)) | |
355 | sample_size += sfr->dsdes; | |
356 | ||
357 | return sample_size; | |
358 | } | |
359 | ||
360 | static void deallocate_buffers(struct cpu_hw_sf *cpuhw) | |
361 | { | |
362 | if (cpuhw->sfb.sdbt) | |
363 | free_sampling_buffer(&cpuhw->sfb); | |
364 | } | |
365 | ||
366 | static int allocate_buffers(struct cpu_hw_sf *cpuhw, struct hw_perf_event *hwc) | |
8c069ff4 | 367 | { |
7e75fc3f HB |
368 | unsigned long n_sdb, freq, factor; |
369 | size_t sfr_size, sample_size; | |
370 | struct sf_raw_sample *sfr; | |
371 | ||
372 | /* Allocate raw sample buffer | |
373 | * | |
374 | * The raw sample buffer is used to temporarily store sampling data | |
375 | * entries for perf raw sample processing. The buffer size mainly | |
376 | * depends on the size of diagnostic-sampling data entries which is | |
377 | * machine-specific. The exact size calculation includes: | |
378 | * 1. The first 4 bytes of diagnostic-sampling data entries are | |
379 | * already reflected in the sf_raw_sample structure. Subtract | |
380 | * these bytes. | |
381 | * 2. The perf raw sample data must be 8-byte aligned (u64) and | |
382 | * perf's internal data size must be considered too. So add | |
383 | * an additional u32 for correct alignment and subtract before | |
384 | * allocating the buffer. | |
385 | * 3. Store the raw sample buffer pointer in the perf event | |
386 | * hardware structure. | |
387 | */ | |
388 | sfr_size = ALIGN((sizeof(*sfr) - sizeof(sfr->diag) + cpuhw->qsi.dsdes) + | |
389 | sizeof(u32), sizeof(u64)); | |
390 | sfr_size -= sizeof(u32); | |
391 | sfr = kzalloc(sfr_size, GFP_KERNEL); | |
392 | if (!sfr) | |
393 | return -ENOMEM; | |
394 | sfr->size = sfr_size; | |
395 | sfr->bsdes = cpuhw->qsi.bsdes; | |
396 | sfr->dsdes = cpuhw->qsi.dsdes; | |
397 | RAWSAMPLE_REG(hwc) = (unsigned long) sfr; | |
8c069ff4 HB |
398 | |
399 | /* Calculate sampling buffers using 4K pages | |
400 | * | |
7e75fc3f HB |
401 | * 1. Determine the sample data size which depends on the used |
402 | * sampling functions, for example, basic-sampling or | |
403 | * basic-sampling with diagnostic-sampling. | |
404 | * | |
405 | * 2. Use the sampling frequency as input. The sampling buffer is | |
406 | * designed for almost one second. This can be adjusted through | |
407 | * the "factor" variable. | |
8c069ff4 | 408 | * In any case, alloc_sampling_buffer() sets the Alert Request |
7e75fc3f | 409 | * Control indicator to trigger a measurement-alert to harvest |
8c069ff4 HB |
410 | * sample-data-blocks (sdb). |
411 | * | |
7e75fc3f | 412 | * 3. Compute the number of sample-data-blocks and ensure a minimum |
8c069ff4 | 413 | * of CPUM_SF_MIN_SDB. Also ensure the upper limit does not |
7e75fc3f HB |
414 | * exceed a "calculated" maximum. The symbolic maximum is |
415 | * designed for basic-sampling only and needs to be increased if | |
416 | * diagnostic-sampling is active. | |
417 | * See also the remarks for these symbolic constants. | |
8c069ff4 | 418 | * |
7e75fc3f HB |
419 | * 4. Compute the number of sample-data-block-tables (SDBT) and |
420 | * ensure a minimum of CPUM_SF_MIN_SDBT (one table can manage up | |
421 | * to 511 SDBs). | |
8c069ff4 | 422 | */ |
7e75fc3f | 423 | sample_size = event_sample_size(hwc); |
8c069ff4 HB |
424 | freq = sample_rate_to_freq(&cpuhw->qsi, SAMPL_RATE(hwc)); |
425 | factor = 1; | |
7e75fc3f | 426 | n_sdb = DIV_ROUND_UP(freq, factor * ((PAGE_SIZE-64) / sample_size)); |
8c069ff4 HB |
427 | if (n_sdb < CPUM_SF_MIN_SDB) |
428 | n_sdb = CPUM_SF_MIN_SDB; | |
429 | ||
69f239ed HB |
430 | /* If there is already a sampling buffer allocated, it is very likely |
431 | * that the sampling facility is enabled too. If the event to be | |
432 | * initialized requires a greater sampling buffer, the allocation must | |
433 | * be postponed. Changing the sampling buffer requires the sampling | |
434 | * facility to be in the disabled state. So, account the number of | |
435 | * required SDBs and let cpumsf_pmu_enable() resize the buffer just | |
436 | * before the event is started. | |
8c069ff4 | 437 | */ |
69f239ed | 438 | sfb_init_allocs(n_sdb, hwc); |
8c069ff4 HB |
439 | if (sf_buffer_available(cpuhw)) |
440 | return 0; | |
441 | ||
442 | debug_sprintf_event(sfdbg, 3, | |
7e75fc3f HB |
443 | "allocate_buffers: rate=%lu f=%lu sdb=%lu/%lu" |
444 | " sample_size=%lu cpuhw=%p\n", | |
445 | SAMPL_RATE(hwc), freq, n_sdb, sfb_max_limit(hwc), | |
446 | sample_size, cpuhw); | |
8c069ff4 HB |
447 | |
448 | return alloc_sampling_buffer(&cpuhw->sfb, | |
69f239ed | 449 | sfb_pending_allocs(&cpuhw->sfb, hwc)); |
8c069ff4 HB |
450 | } |
451 | ||
69f239ed HB |
452 | static unsigned long min_percent(unsigned int percent, unsigned long base, |
453 | unsigned long min) | |
454 | { | |
455 | return min_t(unsigned long, min, DIV_ROUND_UP(percent * base, 100)); | |
456 | } | |
8c069ff4 | 457 | |
69f239ed HB |
458 | static unsigned long compute_sfb_extent(unsigned long ratio, unsigned long base) |
459 | { | |
460 | /* Use a percentage-based approach to extend the sampling facility | |
461 | * buffer. Accept up to 5% sample data loss. | |
462 | * Vary the extents between 1% to 5% of the current number of | |
463 | * sample-data-blocks. | |
464 | */ | |
465 | if (ratio <= 5) | |
466 | return 0; | |
467 | if (ratio <= 25) | |
468 | return min_percent(1, base, 1); | |
469 | if (ratio <= 50) | |
470 | return min_percent(1, base, 1); | |
471 | if (ratio <= 75) | |
472 | return min_percent(2, base, 2); | |
473 | if (ratio <= 100) | |
474 | return min_percent(3, base, 3); | |
475 | if (ratio <= 250) | |
476 | return min_percent(4, base, 4); | |
477 | ||
478 | return min_percent(5, base, 8); | |
479 | } | |
8c069ff4 | 480 | |
69f239ed HB |
481 | static void sfb_account_overflows(struct cpu_hw_sf *cpuhw, |
482 | struct hw_perf_event *hwc) | |
483 | { | |
484 | unsigned long ratio, num; | |
485 | ||
486 | if (!OVERFLOW_REG(hwc)) | |
487 | return; | |
488 | ||
489 | /* The sample_overflow contains the average number of sample data | |
490 | * that has been lost because sample-data-blocks were full. | |
491 | * | |
492 | * Calculate the total number of sample data entries that has been | |
493 | * discarded. Then calculate the ratio of lost samples to total samples | |
494 | * per second in percent. | |
495 | */ | |
496 | ratio = DIV_ROUND_UP(100 * OVERFLOW_REG(hwc) * cpuhw->sfb.num_sdb, | |
497 | sample_rate_to_freq(&cpuhw->qsi, SAMPL_RATE(hwc))); | |
498 | ||
499 | /* Compute number of sample-data-blocks */ | |
500 | num = compute_sfb_extent(ratio, cpuhw->sfb.num_sdb); | |
501 | if (num) | |
502 | sfb_account_allocs(num, hwc); | |
503 | ||
504 | debug_sprintf_event(sfdbg, 5, "sfb: overflow: overflow=%llu ratio=%lu" | |
505 | " num=%lu\n", OVERFLOW_REG(hwc), ratio, num); | |
506 | OVERFLOW_REG(hwc) = 0; | |
507 | } | |
508 | ||
509 | /* extend_sampling_buffer() - Extend sampling buffer | |
510 | * @sfb: Sampling buffer structure (for local CPU) | |
511 | * @hwc: Perf event hardware structure | |
512 | * | |
513 | * Use this function to extend the sampling buffer based on the overflow counter | |
514 | * and postponed allocation extents stored in the specified Perf event hardware. | |
515 | * | |
516 | * Important: This function disables the sampling facility in order to safely | |
517 | * change the sampling buffer structure. Do not call this function | |
518 | * when the PMU is active. | |
8c069ff4 | 519 | */ |
69f239ed HB |
520 | static void extend_sampling_buffer(struct sf_buffer *sfb, |
521 | struct hw_perf_event *hwc) | |
8c069ff4 | 522 | { |
69f239ed HB |
523 | unsigned long num, num_old; |
524 | int rc; | |
8c069ff4 | 525 | |
69f239ed HB |
526 | num = sfb_pending_allocs(sfb, hwc); |
527 | if (!num) | |
528 | return; | |
529 | num_old = sfb->num_sdb; | |
530 | ||
531 | /* Disable the sampling facility to reset any states and also | |
532 | * clear pending measurement alerts. | |
533 | */ | |
534 | sf_disable(); | |
535 | ||
536 | /* Extend the sampling buffer. | |
537 | * This memory allocation typically happens in an atomic context when | |
538 | * called by perf. Because this is a reallocation, it is fine if the | |
539 | * new SDB-request cannot be satisfied immediately. | |
540 | */ | |
541 | rc = realloc_sampling_buffer(sfb, num, GFP_ATOMIC); | |
542 | if (rc) | |
543 | debug_sprintf_event(sfdbg, 5, "sfb: extend: realloc " | |
544 | "failed with rc=%i\n", rc); | |
545 | ||
546 | if (sfb_has_pending_allocs(sfb, hwc)) | |
547 | debug_sprintf_event(sfdbg, 5, "sfb: extend: " | |
548 | "req=%lu alloc=%lu remaining=%lu\n", | |
549 | num, sfb->num_sdb - num_old, | |
550 | sfb_pending_allocs(sfb, hwc)); | |
8c069ff4 HB |
551 | } |
552 | ||
553 | ||
69f239ed HB |
554 | /* Number of perf events counting hardware events */ |
555 | static atomic_t num_events; | |
556 | /* Used to avoid races in calling reserve/release_cpumf_hardware */ | |
557 | static DEFINE_MUTEX(pmc_reserve_mutex); | |
558 | ||
8c069ff4 HB |
559 | #define PMC_INIT 0 |
560 | #define PMC_RELEASE 1 | |
e28bb79d | 561 | #define PMC_FAILURE 2 |
8c069ff4 HB |
562 | static void setup_pmc_cpu(void *flags) |
563 | { | |
564 | int err; | |
eb7e7d76 | 565 | struct cpu_hw_sf *cpusf = this_cpu_ptr(&cpu_hw_sf); |
8c069ff4 | 566 | |
8c069ff4 HB |
567 | err = 0; |
568 | switch (*((int *) flags)) { | |
569 | case PMC_INIT: | |
570 | memset(cpusf, 0, sizeof(*cpusf)); | |
571 | err = qsi(&cpusf->qsi); | |
572 | if (err) | |
573 | break; | |
574 | cpusf->flags |= PMU_F_RESERVED; | |
575 | err = sf_disable(); | |
576 | if (err) | |
577 | pr_err("Switching off the sampling facility failed " | |
578 | "with rc=%i\n", err); | |
579 | debug_sprintf_event(sfdbg, 5, | |
580 | "setup_pmc_cpu: initialized: cpuhw=%p\n", cpusf); | |
581 | break; | |
582 | case PMC_RELEASE: | |
583 | cpusf->flags &= ~PMU_F_RESERVED; | |
584 | err = sf_disable(); | |
585 | if (err) { | |
586 | pr_err("Switching off the sampling facility failed " | |
587 | "with rc=%i\n", err); | |
7e75fc3f HB |
588 | } else |
589 | deallocate_buffers(cpusf); | |
8c069ff4 HB |
590 | debug_sprintf_event(sfdbg, 5, |
591 | "setup_pmc_cpu: released: cpuhw=%p\n", cpusf); | |
592 | break; | |
593 | } | |
e28bb79d HB |
594 | if (err) |
595 | *((int *) flags) |= PMC_FAILURE; | |
8c069ff4 HB |
596 | } |
597 | ||
598 | static void release_pmc_hardware(void) | |
599 | { | |
600 | int flags = PMC_RELEASE; | |
601 | ||
602 | irq_subclass_unregister(IRQ_SUBCLASS_MEASUREMENT_ALERT); | |
603 | on_each_cpu(setup_pmc_cpu, &flags, 1); | |
604 | } | |
605 | ||
606 | static int reserve_pmc_hardware(void) | |
607 | { | |
608 | int flags = PMC_INIT; | |
609 | ||
610 | on_each_cpu(setup_pmc_cpu, &flags, 1); | |
e28bb79d HB |
611 | if (flags & PMC_FAILURE) { |
612 | release_pmc_hardware(); | |
613 | return -ENODEV; | |
614 | } | |
8c069ff4 HB |
615 | irq_subclass_register(IRQ_SUBCLASS_MEASUREMENT_ALERT); |
616 | ||
617 | return 0; | |
618 | } | |
619 | ||
620 | static void hw_perf_event_destroy(struct perf_event *event) | |
621 | { | |
7e75fc3f HB |
622 | /* Free raw sample buffer */ |
623 | if (RAWSAMPLE_REG(&event->hw)) | |
624 | kfree((void *) RAWSAMPLE_REG(&event->hw)); | |
625 | ||
8c069ff4 HB |
626 | /* Release PMC if this is the last perf event */ |
627 | if (!atomic_add_unless(&num_events, -1, 1)) { | |
628 | mutex_lock(&pmc_reserve_mutex); | |
629 | if (atomic_dec_return(&num_events) == 0) | |
630 | release_pmc_hardware(); | |
631 | mutex_unlock(&pmc_reserve_mutex); | |
632 | } | |
633 | } | |
634 | ||
635 | static void hw_init_period(struct hw_perf_event *hwc, u64 period) | |
636 | { | |
637 | hwc->sample_period = period; | |
638 | hwc->last_period = hwc->sample_period; | |
639 | local64_set(&hwc->period_left, hwc->sample_period); | |
640 | } | |
641 | ||
642 | static void hw_reset_registers(struct hw_perf_event *hwc, | |
69f239ed | 643 | unsigned long *sdbt_origin) |
8c069ff4 | 644 | { |
7e75fc3f HB |
645 | struct sf_raw_sample *sfr; |
646 | ||
69f239ed HB |
647 | /* (Re)set to first sample-data-block-table */ |
648 | TEAR_REG(hwc) = (unsigned long) sdbt_origin; | |
7e75fc3f HB |
649 | |
650 | /* (Re)set raw sampling buffer register */ | |
651 | sfr = (struct sf_raw_sample *) RAWSAMPLE_REG(hwc); | |
652 | memset(&sfr->basic, 0, sizeof(sfr->basic)); | |
653 | memset(&sfr->diag, 0, sfr->dsdes); | |
8c069ff4 HB |
654 | } |
655 | ||
656 | static unsigned long hw_limit_rate(const struct hws_qsi_info_block *si, | |
657 | unsigned long rate) | |
658 | { | |
69f239ed HB |
659 | return clamp_t(unsigned long, rate, |
660 | si->min_sampl_rate, si->max_sampl_rate); | |
8c069ff4 HB |
661 | } |
662 | ||
663 | static int __hw_perf_event_init(struct perf_event *event) | |
664 | { | |
665 | struct cpu_hw_sf *cpuhw; | |
666 | struct hws_qsi_info_block si; | |
667 | struct perf_event_attr *attr = &event->attr; | |
668 | struct hw_perf_event *hwc = &event->hw; | |
669 | unsigned long rate; | |
670 | int cpu, err; | |
671 | ||
672 | /* Reserve CPU-measurement sampling facility */ | |
673 | err = 0; | |
674 | if (!atomic_inc_not_zero(&num_events)) { | |
675 | mutex_lock(&pmc_reserve_mutex); | |
676 | if (atomic_read(&num_events) == 0 && reserve_pmc_hardware()) | |
677 | err = -EBUSY; | |
678 | else | |
679 | atomic_inc(&num_events); | |
680 | mutex_unlock(&pmc_reserve_mutex); | |
681 | } | |
682 | event->destroy = hw_perf_event_destroy; | |
683 | ||
684 | if (err) | |
685 | goto out; | |
686 | ||
687 | /* Access per-CPU sampling information (query sampling info) */ | |
688 | /* | |
689 | * The event->cpu value can be -1 to count on every CPU, for example, | |
690 | * when attaching to a task. If this is specified, use the query | |
691 | * sampling info from the current CPU, otherwise use event->cpu to | |
692 | * retrieve the per-CPU information. | |
693 | * Later, cpuhw indicates whether to allocate sampling buffers for a | |
694 | * particular CPU (cpuhw!=NULL) or each online CPU (cpuw==NULL). | |
695 | */ | |
696 | memset(&si, 0, sizeof(si)); | |
697 | cpuhw = NULL; | |
698 | if (event->cpu == -1) | |
699 | qsi(&si); | |
700 | else { | |
701 | /* Event is pinned to a particular CPU, retrieve the per-CPU | |
702 | * sampling structure for accessing the CPU-specific QSI. | |
703 | */ | |
704 | cpuhw = &per_cpu(cpu_hw_sf, event->cpu); | |
705 | si = cpuhw->qsi; | |
706 | } | |
707 | ||
708 | /* Check sampling facility authorization and, if not authorized, | |
709 | * fall back to other PMUs. It is safe to check any CPU because | |
710 | * the authorization is identical for all configured CPUs. | |
711 | */ | |
712 | if (!si.as) { | |
713 | err = -ENOENT; | |
714 | goto out; | |
715 | } | |
716 | ||
7e75fc3f HB |
717 | /* Always enable basic sampling */ |
718 | SAMPL_FLAGS(hwc) = PERF_CPUM_SF_BASIC_MODE; | |
719 | ||
720 | /* Check if diagnostic sampling is requested. Deny if the required | |
721 | * sampling authorization is missing. | |
722 | */ | |
723 | if (attr->config == PERF_EVENT_CPUM_SF_DIAG) { | |
724 | if (!si.ad) { | |
725 | err = -EPERM; | |
726 | goto out; | |
727 | } | |
728 | SAMPL_FLAGS(hwc) |= PERF_CPUM_SF_DIAG_MODE; | |
729 | } | |
730 | ||
d7528862 HB |
731 | /* Check and set other sampling flags */ |
732 | if (attr->config1 & PERF_CPUM_SF_FULL_BLOCKS) | |
733 | SAMPL_FLAGS(hwc) |= PERF_CPUM_SF_FULL_BLOCKS; | |
734 | ||
8c069ff4 HB |
735 | /* The sampling information (si) contains information about the |
736 | * min/max sampling intervals and the CPU speed. So calculate the | |
737 | * correct sampling interval and avoid the whole period adjust | |
738 | * feedback loop. | |
739 | */ | |
740 | rate = 0; | |
741 | if (attr->freq) { | |
742 | rate = freq_to_sample_rate(&si, attr->sample_freq); | |
743 | rate = hw_limit_rate(&si, rate); | |
744 | attr->freq = 0; | |
745 | attr->sample_period = rate; | |
746 | } else { | |
747 | /* The min/max sampling rates specifies the valid range | |
748 | * of sample periods. If the specified sample period is | |
749 | * out of range, limit the period to the range boundary. | |
750 | */ | |
751 | rate = hw_limit_rate(&si, hwc->sample_period); | |
752 | ||
753 | /* The perf core maintains a maximum sample rate that is | |
754 | * configurable through the sysctl interface. Ensure the | |
755 | * sampling rate does not exceed this value. This also helps | |
756 | * to avoid throttling when pushing samples with | |
757 | * perf_event_overflow(). | |
758 | */ | |
759 | if (sample_rate_to_freq(&si, rate) > | |
760 | sysctl_perf_event_sample_rate) { | |
761 | err = -EINVAL; | |
762 | debug_sprintf_event(sfdbg, 1, "Sampling rate exceeds maximum perf sample rate\n"); | |
763 | goto out; | |
764 | } | |
765 | } | |
766 | SAMPL_RATE(hwc) = rate; | |
767 | hw_init_period(hwc, SAMPL_RATE(hwc)); | |
768 | ||
69f239ed HB |
769 | /* Initialize sample data overflow accounting */ |
770 | hwc->extra_reg.reg = REG_OVERFLOW; | |
771 | OVERFLOW_REG(hwc) = 0; | |
772 | ||
8c069ff4 HB |
773 | /* Allocate the per-CPU sampling buffer using the CPU information |
774 | * from the event. If the event is not pinned to a particular | |
775 | * CPU (event->cpu == -1; or cpuhw == NULL), allocate sampling | |
776 | * buffers for each online CPU. | |
777 | */ | |
778 | if (cpuhw) | |
779 | /* Event is pinned to a particular CPU */ | |
7e75fc3f | 780 | err = allocate_buffers(cpuhw, hwc); |
8c069ff4 HB |
781 | else { |
782 | /* Event is not pinned, allocate sampling buffer on | |
783 | * each online CPU | |
784 | */ | |
785 | for_each_online_cpu(cpu) { | |
786 | cpuhw = &per_cpu(cpu_hw_sf, cpu); | |
7e75fc3f | 787 | err = allocate_buffers(cpuhw, hwc); |
8c069ff4 HB |
788 | if (err) |
789 | break; | |
790 | } | |
791 | } | |
792 | out: | |
793 | return err; | |
794 | } | |
795 | ||
796 | static int cpumsf_pmu_event_init(struct perf_event *event) | |
797 | { | |
798 | int err; | |
799 | ||
55baa2f8 HB |
800 | /* No support for taken branch sampling */ |
801 | if (has_branch_stack(event)) | |
802 | return -EOPNOTSUPP; | |
803 | ||
804 | switch (event->attr.type) { | |
805 | case PERF_TYPE_RAW: | |
7e75fc3f HB |
806 | if ((event->attr.config != PERF_EVENT_CPUM_SF) && |
807 | (event->attr.config != PERF_EVENT_CPUM_SF_DIAG)) | |
55baa2f8 HB |
808 | return -ENOENT; |
809 | break; | |
810 | case PERF_TYPE_HARDWARE: | |
811 | /* Support sampling of CPU cycles in addition to the | |
812 | * counter facility. However, the counter facility | |
813 | * is more precise and, hence, restrict this PMU to | |
814 | * sampling events only. | |
815 | */ | |
816 | if (event->attr.config != PERF_COUNT_HW_CPU_CYCLES) | |
817 | return -ENOENT; | |
818 | if (!is_sampling_event(event)) | |
819 | return -ENOENT; | |
820 | break; | |
821 | default: | |
8c069ff4 | 822 | return -ENOENT; |
55baa2f8 | 823 | } |
8c069ff4 | 824 | |
dd127b3b | 825 | /* Check online status of the CPU to which the event is pinned */ |
8c069ff4 HB |
826 | if (event->cpu >= nr_cpumask_bits || |
827 | (event->cpu >= 0 && !cpu_online(event->cpu))) | |
828 | return -ENODEV; | |
829 | ||
dd127b3b HB |
830 | /* Force reset of idle/hv excludes regardless of what the |
831 | * user requested. | |
832 | */ | |
833 | if (event->attr.exclude_hv) | |
834 | event->attr.exclude_hv = 0; | |
835 | if (event->attr.exclude_idle) | |
836 | event->attr.exclude_idle = 0; | |
837 | ||
8c069ff4 HB |
838 | err = __hw_perf_event_init(event); |
839 | if (unlikely(err)) | |
840 | if (event->destroy) | |
841 | event->destroy(event); | |
842 | return err; | |
843 | } | |
844 | ||
845 | static void cpumsf_pmu_enable(struct pmu *pmu) | |
846 | { | |
eb7e7d76 | 847 | struct cpu_hw_sf *cpuhw = this_cpu_ptr(&cpu_hw_sf); |
69f239ed | 848 | struct hw_perf_event *hwc; |
8c069ff4 HB |
849 | int err; |
850 | ||
851 | if (cpuhw->flags & PMU_F_ENABLED) | |
852 | return; | |
853 | ||
854 | if (cpuhw->flags & PMU_F_ERR_MASK) | |
855 | return; | |
856 | ||
69f239ed HB |
857 | /* Check whether to extent the sampling buffer. |
858 | * | |
859 | * Two conditions trigger an increase of the sampling buffer for a | |
860 | * perf event: | |
861 | * 1. Postponed buffer allocations from the event initialization. | |
862 | * 2. Sampling overflows that contribute to pending allocations. | |
863 | * | |
864 | * Note that the extend_sampling_buffer() function disables the sampling | |
865 | * facility, but it can be fully re-enabled using sampling controls that | |
866 | * have been saved in cpumsf_pmu_disable(). | |
867 | */ | |
868 | if (cpuhw->event) { | |
869 | hwc = &cpuhw->event->hw; | |
870 | /* Account number of overflow-designated buffer extents */ | |
871 | sfb_account_overflows(cpuhw, hwc); | |
872 | if (sfb_has_pending_allocs(&cpuhw->sfb, hwc)) | |
873 | extend_sampling_buffer(&cpuhw->sfb, hwc); | |
874 | } | |
875 | ||
876 | /* (Re)enable the PMU and sampling facility */ | |
8c069ff4 HB |
877 | cpuhw->flags |= PMU_F_ENABLED; |
878 | barrier(); | |
879 | ||
880 | err = lsctl(&cpuhw->lsctl); | |
881 | if (err) { | |
882 | cpuhw->flags &= ~PMU_F_ENABLED; | |
883 | pr_err("Loading sampling controls failed: op=%i err=%i\n", | |
884 | 1, err); | |
885 | return; | |
886 | } | |
887 | ||
7e75fc3f HB |
888 | debug_sprintf_event(sfdbg, 6, "pmu_enable: es=%i cs=%i ed=%i cd=%i " |
889 | "tear=%p dear=%p\n", cpuhw->lsctl.es, cpuhw->lsctl.cs, | |
890 | cpuhw->lsctl.ed, cpuhw->lsctl.cd, | |
8c069ff4 HB |
891 | (void *) cpuhw->lsctl.tear, (void *) cpuhw->lsctl.dear); |
892 | } | |
893 | ||
894 | static void cpumsf_pmu_disable(struct pmu *pmu) | |
895 | { | |
eb7e7d76 | 896 | struct cpu_hw_sf *cpuhw = this_cpu_ptr(&cpu_hw_sf); |
8c069ff4 HB |
897 | struct hws_lsctl_request_block inactive; |
898 | struct hws_qsi_info_block si; | |
899 | int err; | |
900 | ||
901 | if (!(cpuhw->flags & PMU_F_ENABLED)) | |
902 | return; | |
903 | ||
904 | if (cpuhw->flags & PMU_F_ERR_MASK) | |
905 | return; | |
906 | ||
907 | /* Switch off sampling activation control */ | |
908 | inactive = cpuhw->lsctl; | |
909 | inactive.cs = 0; | |
7e75fc3f | 910 | inactive.cd = 0; |
8c069ff4 HB |
911 | |
912 | err = lsctl(&inactive); | |
913 | if (err) { | |
914 | pr_err("Loading sampling controls failed: op=%i err=%i\n", | |
915 | 2, err); | |
916 | return; | |
917 | } | |
918 | ||
919 | /* Save state of TEAR and DEAR register contents */ | |
920 | if (!qsi(&si)) { | |
921 | /* TEAR/DEAR values are valid only if the sampling facility is | |
922 | * enabled. Note that cpumsf_pmu_disable() might be called even | |
923 | * for a disabled sampling facility because cpumsf_pmu_enable() | |
924 | * controls the enable/disable state. | |
925 | */ | |
926 | if (si.es) { | |
927 | cpuhw->lsctl.tear = si.tear; | |
928 | cpuhw->lsctl.dear = si.dear; | |
929 | } | |
930 | } else | |
931 | debug_sprintf_event(sfdbg, 3, "cpumsf_pmu_disable: " | |
932 | "qsi() failed with err=%i\n", err); | |
933 | ||
934 | cpuhw->flags &= ~PMU_F_ENABLED; | |
935 | } | |
936 | ||
dd127b3b HB |
937 | /* perf_exclude_event() - Filter event |
938 | * @event: The perf event | |
939 | * @regs: pt_regs structure | |
940 | * @sde_regs: Sample-data-entry (sde) regs structure | |
941 | * | |
942 | * Filter perf events according to their exclude specification. | |
943 | * | |
944 | * Return non-zero if the event shall be excluded. | |
945 | */ | |
946 | static int perf_exclude_event(struct perf_event *event, struct pt_regs *regs, | |
947 | struct perf_sf_sde_regs *sde_regs) | |
948 | { | |
949 | if (event->attr.exclude_user && user_mode(regs)) | |
950 | return 1; | |
951 | if (event->attr.exclude_kernel && !user_mode(regs)) | |
952 | return 1; | |
953 | if (event->attr.exclude_guest && sde_regs->in_guest) | |
954 | return 1; | |
955 | if (event->attr.exclude_host && !sde_regs->in_guest) | |
956 | return 1; | |
957 | return 0; | |
958 | } | |
959 | ||
8c069ff4 HB |
960 | /* perf_push_sample() - Push samples to perf |
961 | * @event: The perf event | |
962 | * @sample: Hardware sample data | |
963 | * | |
964 | * Use the hardware sample data to create perf event sample. The sample | |
965 | * is the pushed to the event subsystem and the function checks for | |
966 | * possible event overflows. If an event overflow occurs, the PMU is | |
967 | * stopped. | |
968 | * | |
969 | * Return non-zero if an event overflow occurred. | |
970 | */ | |
7e75fc3f | 971 | static int perf_push_sample(struct perf_event *event, struct sf_raw_sample *sfr) |
8c069ff4 HB |
972 | { |
973 | int overflow; | |
974 | struct pt_regs regs; | |
443e802b | 975 | struct perf_sf_sde_regs *sde_regs; |
8c069ff4 | 976 | struct perf_sample_data data; |
7e3f977e DB |
977 | struct perf_raw_record raw = { |
978 | .frag = { | |
979 | .size = sfr->size, | |
980 | .data = sfr, | |
981 | }, | |
982 | }; | |
8c069ff4 | 983 | |
7e75fc3f | 984 | /* Setup perf sample */ |
8c069ff4 | 985 | perf_sample_data_init(&data, 0, event->hw.last_period); |
7e75fc3f | 986 | data.raw = &raw; |
8c069ff4 | 987 | |
443e802b HB |
988 | /* Setup pt_regs to look like an CPU-measurement external interrupt |
989 | * using the Program Request Alert code. The regs.int_parm_long | |
990 | * field which is unused contains additional sample-data-entry related | |
991 | * indicators. | |
992 | */ | |
8c069ff4 | 993 | memset(®s, 0, sizeof(regs)); |
443e802b HB |
994 | regs.int_code = 0x1407; |
995 | regs.int_parm = CPU_MF_INT_SF_PRA; | |
996 | sde_regs = (struct perf_sf_sde_regs *) ®s.int_parm_long; | |
997 | ||
1e16b096 HC |
998 | psw_bits(regs.psw).ia = sfr->basic.ia; |
999 | psw_bits(regs.psw).t = sfr->basic.T; | |
1000 | psw_bits(regs.psw).w = sfr->basic.W; | |
1001 | psw_bits(regs.psw).p = sfr->basic.P; | |
1002 | psw_bits(regs.psw).as = sfr->basic.AS; | |
8c069ff4 | 1003 | |
e22cf8ca | 1004 | /* |
c19805f8 CB |
1005 | * Use the hardware provided configuration level to decide if the |
1006 | * sample belongs to a guest or host. If that is not available, | |
1007 | * fall back to the following heuristics: | |
1008 | * A non-zero guest program parameter always indicates a guest | |
1009 | * sample. Some early samples or samples from guests without | |
b1685ab9 | 1010 | * lpp usage would be misaccounted to the host. We use the asn |
c19805f8 CB |
1011 | * value as an addon heuristic to detect most of these guest samples. |
1012 | * If the value differs from the host hpp value, we assume to be a | |
1013 | * KVM guest. | |
443e802b | 1014 | */ |
c19805f8 CB |
1015 | switch (sfr->basic.CL) { |
1016 | case 1: /* logical partition */ | |
1017 | sde_regs->in_guest = 0; | |
1018 | break; | |
1019 | case 2: /* virtual machine */ | |
443e802b | 1020 | sde_regs->in_guest = 1; |
c19805f8 CB |
1021 | break; |
1022 | default: /* old machine, use heuristics */ | |
1023 | if (sfr->basic.gpp || | |
1024 | sfr->basic.prim_asn != (u16)sfr->basic.hpp) | |
1025 | sde_regs->in_guest = 1; | |
1026 | break; | |
1027 | } | |
443e802b | 1028 | |
8c069ff4 | 1029 | overflow = 0; |
dd127b3b HB |
1030 | if (perf_exclude_event(event, ®s, sde_regs)) |
1031 | goto out; | |
8c069ff4 HB |
1032 | if (perf_event_overflow(event, &data, ®s)) { |
1033 | overflow = 1; | |
1034 | event->pmu->stop(event, 0); | |
8c069ff4 HB |
1035 | } |
1036 | perf_event_update_userpage(event); | |
dd127b3b | 1037 | out: |
8c069ff4 HB |
1038 | return overflow; |
1039 | } | |
1040 | ||
1041 | static void perf_event_count_update(struct perf_event *event, u64 count) | |
1042 | { | |
1043 | local64_add(count, &event->count); | |
1044 | } | |
1045 | ||
7e75fc3f HB |
1046 | static int sample_format_is_valid(struct hws_combined_entry *sample, |
1047 | unsigned int flags) | |
1048 | { | |
1049 | if (likely(flags & PERF_CPUM_SF_BASIC_MODE)) | |
1050 | /* Only basic-sampling data entries with data-entry-format | |
1051 | * version of 0x0001 can be processed. | |
1052 | */ | |
1053 | if (sample->basic.def != 0x0001) | |
1054 | return 0; | |
1055 | if (flags & PERF_CPUM_SF_DIAG_MODE) | |
1056 | /* The data-entry-format number of diagnostic-sampling data | |
1057 | * entries can vary. Because diagnostic data is just passed | |
1058 | * through, do only a sanity check on the DEF. | |
1059 | */ | |
1060 | if (sample->diag.def < 0x8001) | |
1061 | return 0; | |
1062 | return 1; | |
1063 | } | |
1064 | ||
1065 | static int sample_is_consistent(struct hws_combined_entry *sample, | |
1066 | unsigned long flags) | |
1067 | { | |
1068 | /* This check applies only to basic-sampling data entries of potentially | |
1069 | * combined-sampling data entries. Invalid entries cannot be processed | |
1070 | * by the PMU and, thus, do not deliver an associated | |
1071 | * diagnostic-sampling data entry. | |
1072 | */ | |
1073 | if (unlikely(!(flags & PERF_CPUM_SF_BASIC_MODE))) | |
1074 | return 0; | |
1075 | /* | |
1076 | * Samples are skipped, if they are invalid or for which the | |
1077 | * instruction address is not predictable, i.e., the wait-state bit is | |
1078 | * set. | |
1079 | */ | |
1080 | if (sample->basic.I || sample->basic.W) | |
1081 | return 0; | |
1082 | return 1; | |
1083 | } | |
1084 | ||
1085 | static void reset_sample_slot(struct hws_combined_entry *sample, | |
1086 | unsigned long flags) | |
1087 | { | |
1088 | if (likely(flags & PERF_CPUM_SF_BASIC_MODE)) | |
1089 | sample->basic.def = 0; | |
1090 | if (flags & PERF_CPUM_SF_DIAG_MODE) | |
1091 | sample->diag.def = 0; | |
1092 | } | |
1093 | ||
1094 | static void sfr_store_sample(struct sf_raw_sample *sfr, | |
1095 | struct hws_combined_entry *sample) | |
1096 | { | |
1097 | if (likely(sfr->format & PERF_CPUM_SF_BASIC_MODE)) | |
1098 | sfr->basic = sample->basic; | |
1099 | if (sfr->format & PERF_CPUM_SF_DIAG_MODE) | |
1100 | memcpy(&sfr->diag, &sample->diag, sfr->dsdes); | |
1101 | } | |
1102 | ||
1103 | static void debug_sample_entry(struct hws_combined_entry *sample, | |
1104 | struct hws_trailer_entry *te, | |
1105 | unsigned long flags) | |
1106 | { | |
1107 | debug_sprintf_event(sfdbg, 4, "hw_collect_samples: Found unknown " | |
1108 | "sampling data entry: te->f=%i basic.def=%04x (%p)" | |
1109 | " diag.def=%04x (%p)\n", te->f, | |
1110 | sample->basic.def, &sample->basic, | |
1111 | (flags & PERF_CPUM_SF_DIAG_MODE) | |
1112 | ? sample->diag.def : 0xFFFF, | |
1113 | (flags & PERF_CPUM_SF_DIAG_MODE) | |
1114 | ? &sample->diag : NULL); | |
1115 | } | |
1116 | ||
8c069ff4 HB |
1117 | /* hw_collect_samples() - Walk through a sample-data-block and collect samples |
1118 | * @event: The perf event | |
1119 | * @sdbt: Sample-data-block table | |
1120 | * @overflow: Event overflow counter | |
1121 | * | |
7e75fc3f HB |
1122 | * Walks through a sample-data-block and collects sampling data entries that are |
1123 | * then pushed to the perf event subsystem. Depending on the sampling function, | |
1124 | * there can be either basic-sampling or combined-sampling data entries. A | |
1125 | * combined-sampling data entry consists of a basic- and a diagnostic-sampling | |
1126 | * data entry. The sampling function is determined by the flags in the perf | |
1127 | * event hardware structure. The function always works with a combined-sampling | |
1128 | * data entry but ignores the the diagnostic portion if it is not available. | |
1129 | * | |
1130 | * Note that the implementation focuses on basic-sampling data entries and, if | |
1131 | * such an entry is not valid, the entire combined-sampling data entry is | |
1132 | * ignored. | |
1133 | * | |
1134 | * The overflow variables counts the number of samples that has been discarded | |
1135 | * due to a perf event overflow. | |
8c069ff4 HB |
1136 | */ |
1137 | static void hw_collect_samples(struct perf_event *event, unsigned long *sdbt, | |
1138 | unsigned long long *overflow) | |
1139 | { | |
7e75fc3f HB |
1140 | unsigned long flags = SAMPL_FLAGS(&event->hw); |
1141 | struct hws_combined_entry *sample; | |
1142 | struct hws_trailer_entry *te; | |
1143 | struct sf_raw_sample *sfr; | |
1144 | size_t sample_size; | |
1145 | ||
1146 | /* Prepare and initialize raw sample data */ | |
1147 | sfr = (struct sf_raw_sample *) RAWSAMPLE_REG(&event->hw); | |
1148 | sfr->format = flags & PERF_CPUM_SF_MODE_MASK; | |
8c069ff4 | 1149 | |
7e75fc3f HB |
1150 | sample_size = event_sample_size(&event->hw); |
1151 | te = (struct hws_trailer_entry *) trailer_entry_ptr(*sdbt); | |
1152 | sample = (struct hws_combined_entry *) *sdbt; | |
1153 | while ((unsigned long *) sample < (unsigned long *) te) { | |
8c069ff4 | 1154 | /* Check for an empty sample */ |
7e75fc3f | 1155 | if (!sample->basic.def) |
8c069ff4 HB |
1156 | break; |
1157 | ||
1158 | /* Update perf event period */ | |
1159 | perf_event_count_update(event, SAMPL_RATE(&event->hw)); | |
1160 | ||
7e75fc3f HB |
1161 | /* Check sampling data entry */ |
1162 | if (sample_format_is_valid(sample, flags)) { | |
8c069ff4 HB |
1163 | /* If an event overflow occurred, the PMU is stopped to |
1164 | * throttle event delivery. Remaining sample data is | |
1165 | * discarded. | |
1166 | */ | |
7e75fc3f HB |
1167 | if (!*overflow) { |
1168 | if (sample_is_consistent(sample, flags)) { | |
1169 | /* Deliver sample data to perf */ | |
1170 | sfr_store_sample(sfr, sample); | |
1171 | *overflow = perf_push_sample(event, sfr); | |
1172 | } | |
1173 | } else | |
8c069ff4 HB |
1174 | /* Count discarded samples */ |
1175 | *overflow += 1; | |
7e75fc3f HB |
1176 | } else { |
1177 | debug_sample_entry(sample, te, flags); | |
1178 | /* Sample slot is not yet written or other record. | |
1179 | * | |
1180 | * This condition can occur if the buffer was reused | |
1181 | * from a combined basic- and diagnostic-sampling. | |
1182 | * If only basic-sampling is then active, entries are | |
1183 | * written into the larger diagnostic entries. | |
1184 | * This is typically the case for sample-data-blocks | |
1185 | * that are not full. Stop processing if the first | |
1186 | * invalid format was detected. | |
1187 | */ | |
1188 | if (!te->f) | |
1189 | break; | |
1190 | } | |
8c069ff4 HB |
1191 | |
1192 | /* Reset sample slot and advance to next sample */ | |
7e75fc3f HB |
1193 | reset_sample_slot(sample, flags); |
1194 | sample += sample_size; | |
8c069ff4 HB |
1195 | } |
1196 | } | |
1197 | ||
1198 | /* hw_perf_event_update() - Process sampling buffer | |
1199 | * @event: The perf event | |
1200 | * @flush_all: Flag to also flush partially filled sample-data-blocks | |
1201 | * | |
1202 | * Processes the sampling buffer and create perf event samples. | |
1203 | * The sampling buffer position are retrieved and saved in the TEAR_REG | |
1204 | * register of the specified perf event. | |
1205 | * | |
1206 | * Only full sample-data-blocks are processed. Specify the flash_all flag | |
d7528862 HB |
1207 | * to also walk through partially filled sample-data-blocks. It is ignored |
1208 | * if PERF_CPUM_SF_FULL_BLOCKS is set. The PERF_CPUM_SF_FULL_BLOCKS flag | |
1209 | * enforces the processing of full sample-data-blocks only (trailer entries | |
1210 | * with the block-full-indicator bit set). | |
8c069ff4 HB |
1211 | */ |
1212 | static void hw_perf_event_update(struct perf_event *event, int flush_all) | |
1213 | { | |
1214 | struct hw_perf_event *hwc = &event->hw; | |
1215 | struct hws_trailer_entry *te; | |
1216 | unsigned long *sdbt; | |
fcc77f50 | 1217 | unsigned long long event_overflow, sampl_overflow, num_sdb, te_flags; |
8c069ff4 HB |
1218 | int done; |
1219 | ||
d7528862 HB |
1220 | if (flush_all && SDB_FULL_BLOCKS(hwc)) |
1221 | flush_all = 0; | |
1222 | ||
8c069ff4 | 1223 | sdbt = (unsigned long *) TEAR_REG(hwc); |
69f239ed | 1224 | done = event_overflow = sampl_overflow = num_sdb = 0; |
8c069ff4 HB |
1225 | while (!done) { |
1226 | /* Get the trailer entry of the sample-data-block */ | |
1227 | te = (struct hws_trailer_entry *) trailer_entry_ptr(*sdbt); | |
1228 | ||
1229 | /* Leave loop if no more work to do (block full indicator) */ | |
1230 | if (!te->f) { | |
1231 | done = 1; | |
1232 | if (!flush_all) | |
1233 | break; | |
1234 | } | |
1235 | ||
69f239ed HB |
1236 | /* Check the sample overflow count */ |
1237 | if (te->overflow) | |
1238 | /* Account sample overflows and, if a particular limit | |
1239 | * is reached, extend the sampling buffer. | |
1240 | * For details, see sfb_account_overflows(). | |
8c069ff4 | 1241 | */ |
69f239ed | 1242 | sampl_overflow += te->overflow; |
8c069ff4 HB |
1243 | |
1244 | /* Timestamps are valid for full sample-data-blocks only */ | |
1245 | debug_sprintf_event(sfdbg, 6, "hw_perf_event_update: sdbt=%p " | |
1246 | "overflow=%llu timestamp=0x%llx\n", | |
1247 | sdbt, te->overflow, | |
443d4beb | 1248 | (te->f) ? trailer_timestamp(te) : 0ULL); |
8c069ff4 HB |
1249 | |
1250 | /* Collect all samples from a single sample-data-block and | |
1251 | * flag if an (perf) event overflow happened. If so, the PMU | |
1252 | * is stopped and remaining samples will be discarded. | |
1253 | */ | |
1254 | hw_collect_samples(event, sdbt, &event_overflow); | |
69f239ed | 1255 | num_sdb++; |
8c069ff4 | 1256 | |
fcc77f50 HB |
1257 | /* Reset trailer (using compare-double-and-swap) */ |
1258 | do { | |
1259 | te_flags = te->flags & ~SDB_TE_BUFFER_FULL_MASK; | |
1260 | te_flags |= SDB_TE_ALERT_REQ_MASK; | |
1261 | } while (!cmpxchg_double(&te->flags, &te->overflow, | |
1262 | te->flags, te->overflow, | |
1263 | te_flags, 0ULL)); | |
8c069ff4 HB |
1264 | |
1265 | /* Advance to next sample-data-block */ | |
1266 | sdbt++; | |
1267 | if (is_link_entry(sdbt)) | |
1268 | sdbt = get_next_sdbt(sdbt); | |
1269 | ||
1270 | /* Update event hardware registers */ | |
1271 | TEAR_REG(hwc) = (unsigned long) sdbt; | |
1272 | ||
1273 | /* Stop processing sample-data if all samples of the current | |
1274 | * sample-data-block were flushed even if it was not full. | |
1275 | */ | |
1276 | if (flush_all && done) | |
1277 | break; | |
1278 | ||
1279 | /* If an event overflow happened, discard samples by | |
1280 | * processing any remaining sample-data-blocks. | |
1281 | */ | |
1282 | if (event_overflow) | |
1283 | flush_all = 1; | |
1284 | } | |
1285 | ||
69f239ed HB |
1286 | /* Account sample overflows in the event hardware structure */ |
1287 | if (sampl_overflow) | |
1288 | OVERFLOW_REG(hwc) = DIV_ROUND_UP(OVERFLOW_REG(hwc) + | |
1289 | sampl_overflow, 1 + num_sdb); | |
8c069ff4 HB |
1290 | if (sampl_overflow || event_overflow) |
1291 | debug_sprintf_event(sfdbg, 4, "hw_perf_event_update: " | |
1292 | "overflow stats: sample=%llu event=%llu\n", | |
1293 | sampl_overflow, event_overflow); | |
1294 | } | |
1295 | ||
1296 | static void cpumsf_pmu_read(struct perf_event *event) | |
1297 | { | |
1298 | /* Nothing to do ... updates are interrupt-driven */ | |
1299 | } | |
1300 | ||
1301 | /* Activate sampling control. | |
1302 | * Next call of pmu_enable() starts sampling. | |
1303 | */ | |
1304 | static void cpumsf_pmu_start(struct perf_event *event, int flags) | |
1305 | { | |
eb7e7d76 | 1306 | struct cpu_hw_sf *cpuhw = this_cpu_ptr(&cpu_hw_sf); |
8c069ff4 HB |
1307 | |
1308 | if (WARN_ON_ONCE(!(event->hw.state & PERF_HES_STOPPED))) | |
1309 | return; | |
1310 | ||
1311 | if (flags & PERF_EF_RELOAD) | |
1312 | WARN_ON_ONCE(!(event->hw.state & PERF_HES_UPTODATE)); | |
1313 | ||
1314 | perf_pmu_disable(event->pmu); | |
1315 | event->hw.state = 0; | |
1316 | cpuhw->lsctl.cs = 1; | |
7e75fc3f HB |
1317 | if (SAMPL_DIAG_MODE(&event->hw)) |
1318 | cpuhw->lsctl.cd = 1; | |
8c069ff4 HB |
1319 | perf_pmu_enable(event->pmu); |
1320 | } | |
1321 | ||
1322 | /* Deactivate sampling control. | |
1323 | * Next call of pmu_enable() stops sampling. | |
1324 | */ | |
1325 | static void cpumsf_pmu_stop(struct perf_event *event, int flags) | |
1326 | { | |
eb7e7d76 | 1327 | struct cpu_hw_sf *cpuhw = this_cpu_ptr(&cpu_hw_sf); |
8c069ff4 HB |
1328 | |
1329 | if (event->hw.state & PERF_HES_STOPPED) | |
1330 | return; | |
1331 | ||
1332 | perf_pmu_disable(event->pmu); | |
1333 | cpuhw->lsctl.cs = 0; | |
7e75fc3f | 1334 | cpuhw->lsctl.cd = 0; |
8c069ff4 HB |
1335 | event->hw.state |= PERF_HES_STOPPED; |
1336 | ||
1337 | if ((flags & PERF_EF_UPDATE) && !(event->hw.state & PERF_HES_UPTODATE)) { | |
1338 | hw_perf_event_update(event, 1); | |
1339 | event->hw.state |= PERF_HES_UPTODATE; | |
1340 | } | |
1341 | perf_pmu_enable(event->pmu); | |
1342 | } | |
1343 | ||
1344 | static int cpumsf_pmu_add(struct perf_event *event, int flags) | |
1345 | { | |
eb7e7d76 | 1346 | struct cpu_hw_sf *cpuhw = this_cpu_ptr(&cpu_hw_sf); |
8c069ff4 HB |
1347 | int err; |
1348 | ||
1349 | if (cpuhw->flags & PMU_F_IN_USE) | |
1350 | return -EAGAIN; | |
1351 | ||
1352 | if (!cpuhw->sfb.sdbt) | |
1353 | return -EINVAL; | |
1354 | ||
1355 | err = 0; | |
1356 | perf_pmu_disable(event->pmu); | |
1357 | ||
1358 | event->hw.state = PERF_HES_UPTODATE | PERF_HES_STOPPED; | |
1359 | ||
1360 | /* Set up sampling controls. Always program the sampling register | |
1361 | * using the SDB-table start. Reset TEAR_REG event hardware register | |
1362 | * that is used by hw_perf_event_update() to store the sampling buffer | |
1363 | * position after samples have been flushed. | |
1364 | */ | |
1365 | cpuhw->lsctl.s = 0; | |
1366 | cpuhw->lsctl.h = 1; | |
69f239ed | 1367 | cpuhw->lsctl.tear = (unsigned long) cpuhw->sfb.sdbt; |
8c069ff4 HB |
1368 | cpuhw->lsctl.dear = *(unsigned long *) cpuhw->sfb.sdbt; |
1369 | cpuhw->lsctl.interval = SAMPL_RATE(&event->hw); | |
1370 | hw_reset_registers(&event->hw, cpuhw->sfb.sdbt); | |
1371 | ||
1372 | /* Ensure sampling functions are in the disabled state. If disabled, | |
1373 | * switch on sampling enable control. */ | |
7e75fc3f | 1374 | if (WARN_ON_ONCE(cpuhw->lsctl.es == 1 || cpuhw->lsctl.ed == 1)) { |
8c069ff4 HB |
1375 | err = -EAGAIN; |
1376 | goto out; | |
1377 | } | |
1378 | cpuhw->lsctl.es = 1; | |
7e75fc3f HB |
1379 | if (SAMPL_DIAG_MODE(&event->hw)) |
1380 | cpuhw->lsctl.ed = 1; | |
8c069ff4 HB |
1381 | |
1382 | /* Set in_use flag and store event */ | |
8c069ff4 HB |
1383 | cpuhw->event = event; |
1384 | cpuhw->flags |= PMU_F_IN_USE; | |
1385 | ||
1386 | if (flags & PERF_EF_START) | |
1387 | cpumsf_pmu_start(event, PERF_EF_RELOAD); | |
1388 | out: | |
1389 | perf_event_update_userpage(event); | |
1390 | perf_pmu_enable(event->pmu); | |
1391 | return err; | |
1392 | } | |
1393 | ||
1394 | static void cpumsf_pmu_del(struct perf_event *event, int flags) | |
1395 | { | |
eb7e7d76 | 1396 | struct cpu_hw_sf *cpuhw = this_cpu_ptr(&cpu_hw_sf); |
8c069ff4 HB |
1397 | |
1398 | perf_pmu_disable(event->pmu); | |
1399 | cpumsf_pmu_stop(event, PERF_EF_UPDATE); | |
1400 | ||
1401 | cpuhw->lsctl.es = 0; | |
7e75fc3f | 1402 | cpuhw->lsctl.ed = 0; |
8c069ff4 HB |
1403 | cpuhw->flags &= ~PMU_F_IN_USE; |
1404 | cpuhw->event = NULL; | |
1405 | ||
1406 | perf_event_update_userpage(event); | |
1407 | perf_pmu_enable(event->pmu); | |
1408 | } | |
1409 | ||
8c069ff4 | 1410 | CPUMF_EVENT_ATTR(SF, SF_CYCLES_BASIC, PERF_EVENT_CPUM_SF); |
7e75fc3f | 1411 | CPUMF_EVENT_ATTR(SF, SF_CYCLES_BASIC_DIAG, PERF_EVENT_CPUM_SF_DIAG); |
8c069ff4 HB |
1412 | |
1413 | static struct attribute *cpumsf_pmu_events_attr[] = { | |
1414 | CPUMF_EVENT_PTR(SF, SF_CYCLES_BASIC), | |
0a648150 | 1415 | NULL, |
8c069ff4 HB |
1416 | NULL, |
1417 | }; | |
1418 | ||
1419 | PMU_FORMAT_ATTR(event, "config:0-63"); | |
1420 | ||
1421 | static struct attribute *cpumsf_pmu_format_attr[] = { | |
1422 | &format_attr_event.attr, | |
1423 | NULL, | |
1424 | }; | |
1425 | ||
1426 | static struct attribute_group cpumsf_pmu_events_group = { | |
1427 | .name = "events", | |
1428 | .attrs = cpumsf_pmu_events_attr, | |
1429 | }; | |
1430 | static struct attribute_group cpumsf_pmu_format_group = { | |
1431 | .name = "format", | |
1432 | .attrs = cpumsf_pmu_format_attr, | |
1433 | }; | |
1434 | static const struct attribute_group *cpumsf_pmu_attr_groups[] = { | |
1435 | &cpumsf_pmu_events_group, | |
1436 | &cpumsf_pmu_format_group, | |
1437 | NULL, | |
1438 | }; | |
1439 | ||
1440 | static struct pmu cpumf_sampling = { | |
1441 | .pmu_enable = cpumsf_pmu_enable, | |
1442 | .pmu_disable = cpumsf_pmu_disable, | |
1443 | ||
1444 | .event_init = cpumsf_pmu_event_init, | |
1445 | .add = cpumsf_pmu_add, | |
1446 | .del = cpumsf_pmu_del, | |
1447 | ||
1448 | .start = cpumsf_pmu_start, | |
1449 | .stop = cpumsf_pmu_stop, | |
1450 | .read = cpumsf_pmu_read, | |
1451 | ||
8c069ff4 HB |
1452 | .attr_groups = cpumsf_pmu_attr_groups, |
1453 | }; | |
1454 | ||
1455 | static void cpumf_measurement_alert(struct ext_code ext_code, | |
1456 | unsigned int alert, unsigned long unused) | |
1457 | { | |
1458 | struct cpu_hw_sf *cpuhw; | |
1459 | ||
1460 | if (!(alert & CPU_MF_INT_SF_MASK)) | |
1461 | return; | |
1462 | inc_irq_stat(IRQEXT_CMS); | |
eb7e7d76 | 1463 | cpuhw = this_cpu_ptr(&cpu_hw_sf); |
8c069ff4 HB |
1464 | |
1465 | /* Measurement alerts are shared and might happen when the PMU | |
1466 | * is not reserved. Ignore these alerts in this case. */ | |
1467 | if (!(cpuhw->flags & PMU_F_RESERVED)) | |
1468 | return; | |
1469 | ||
1470 | /* The processing below must take care of multiple alert events that | |
1471 | * might be indicated concurrently. */ | |
1472 | ||
1473 | /* Program alert request */ | |
1474 | if (alert & CPU_MF_INT_SF_PRA) { | |
1475 | if (cpuhw->flags & PMU_F_IN_USE) | |
1476 | hw_perf_event_update(cpuhw->event, 0); | |
1477 | else | |
1478 | WARN_ON_ONCE(!(cpuhw->flags & PMU_F_IN_USE)); | |
1479 | } | |
1480 | ||
1481 | /* Report measurement alerts only for non-PRA codes */ | |
1482 | if (alert != CPU_MF_INT_SF_PRA) | |
1483 | debug_sprintf_event(sfdbg, 6, "measurement alert: 0x%x\n", alert); | |
1484 | ||
1485 | /* Sampling authorization change request */ | |
1486 | if (alert & CPU_MF_INT_SF_SACA) | |
1487 | qsi(&cpuhw->qsi); | |
1488 | ||
1489 | /* Loss of sample data due to high-priority machine activities */ | |
1490 | if (alert & CPU_MF_INT_SF_LSDA) { | |
1491 | pr_err("Sample data was lost\n"); | |
1492 | cpuhw->flags |= PMU_F_ERR_LSDA; | |
1493 | sf_disable(); | |
1494 | } | |
1495 | ||
1496 | /* Invalid sampling buffer entry */ | |
1497 | if (alert & (CPU_MF_INT_SF_IAE|CPU_MF_INT_SF_ISE)) { | |
1498 | pr_err("A sampling buffer entry is incorrect (alert=0x%x)\n", | |
1499 | alert); | |
1500 | cpuhw->flags |= PMU_F_ERR_IBE; | |
1501 | sf_disable(); | |
1502 | } | |
1503 | } | |
e3d617fe | 1504 | static int cpusf_pmu_setup(unsigned int cpu, int flags) |
8c069ff4 | 1505 | { |
8c069ff4 HB |
1506 | /* Ignore the notification if no events are scheduled on the PMU. |
1507 | * This might be racy... | |
1508 | */ | |
1509 | if (!atomic_read(&num_events)) | |
e3d617fe | 1510 | return 0; |
8c069ff4 | 1511 | |
e3d617fe SAS |
1512 | local_irq_disable(); |
1513 | setup_pmc_cpu(&flags); | |
1514 | local_irq_enable(); | |
1515 | return 0; | |
1516 | } | |
1517 | ||
1518 | static int s390_pmu_sf_online_cpu(unsigned int cpu) | |
1519 | { | |
1520 | return cpusf_pmu_setup(cpu, PMC_INIT); | |
1521 | } | |
1522 | ||
1523 | static int s390_pmu_sf_offline_cpu(unsigned int cpu) | |
1524 | { | |
1525 | return cpusf_pmu_setup(cpu, PMC_RELEASE); | |
8c069ff4 HB |
1526 | } |
1527 | ||
69f239ed HB |
1528 | static int param_get_sfb_size(char *buffer, const struct kernel_param *kp) |
1529 | { | |
1530 | if (!cpum_sf_avail()) | |
1531 | return -ENODEV; | |
1532 | return sprintf(buffer, "%lu,%lu", CPUM_SF_MIN_SDB, CPUM_SF_MAX_SDB); | |
1533 | } | |
1534 | ||
1535 | static int param_set_sfb_size(const char *val, const struct kernel_param *kp) | |
1536 | { | |
1537 | int rc; | |
1538 | unsigned long min, max; | |
1539 | ||
1540 | if (!cpum_sf_avail()) | |
1541 | return -ENODEV; | |
1542 | if (!val || !strlen(val)) | |
1543 | return -EINVAL; | |
1544 | ||
1545 | /* Valid parameter values: "min,max" or "max" */ | |
1546 | min = CPUM_SF_MIN_SDB; | |
1547 | max = CPUM_SF_MAX_SDB; | |
1548 | if (strchr(val, ',')) | |
1549 | rc = (sscanf(val, "%lu,%lu", &min, &max) == 2) ? 0 : -EINVAL; | |
1550 | else | |
1551 | rc = kstrtoul(val, 10, &max); | |
1552 | ||
1553 | if (min < 2 || min >= max || max > get_num_physpages()) | |
1554 | rc = -EINVAL; | |
1555 | if (rc) | |
1556 | return rc; | |
1557 | ||
1558 | sfb_set_limits(min, max); | |
7e75fc3f HB |
1559 | pr_info("The sampling buffer limits have changed to: " |
1560 | "min=%lu max=%lu (diag=x%lu)\n", | |
1561 | CPUM_SF_MIN_SDB, CPUM_SF_MAX_SDB, CPUM_SF_SDB_DIAG_FACTOR); | |
69f239ed HB |
1562 | return 0; |
1563 | } | |
1564 | ||
1565 | #define param_check_sfb_size(name, p) __param_check(name, p, void) | |
9c27847d | 1566 | static const struct kernel_param_ops param_ops_sfb_size = { |
69f239ed HB |
1567 | .set = param_set_sfb_size, |
1568 | .get = param_get_sfb_size, | |
1569 | }; | |
1570 | ||
7e75fc3f HB |
1571 | #define RS_INIT_FAILURE_QSI 0x0001 |
1572 | #define RS_INIT_FAILURE_BSDES 0x0002 | |
1573 | #define RS_INIT_FAILURE_ALRT 0x0003 | |
1574 | #define RS_INIT_FAILURE_PERF 0x0004 | |
1575 | static void __init pr_cpumsf_err(unsigned int reason) | |
1576 | { | |
1577 | pr_err("Sampling facility support for perf is not available: " | |
1578 | "reason=%04x\n", reason); | |
1579 | } | |
1580 | ||
8c069ff4 HB |
1581 | static int __init init_cpum_sampling_pmu(void) |
1582 | { | |
7e75fc3f | 1583 | struct hws_qsi_info_block si; |
8c069ff4 HB |
1584 | int err; |
1585 | ||
1586 | if (!cpum_sf_avail()) | |
1587 | return -ENODEV; | |
1588 | ||
7e75fc3f HB |
1589 | memset(&si, 0, sizeof(si)); |
1590 | if (qsi(&si)) { | |
1591 | pr_cpumsf_err(RS_INIT_FAILURE_QSI); | |
1592 | return -ENODEV; | |
1593 | } | |
1594 | ||
1595 | if (si.bsdes != sizeof(struct hws_basic_entry)) { | |
1596 | pr_cpumsf_err(RS_INIT_FAILURE_BSDES); | |
1597 | return -EINVAL; | |
1598 | } | |
1599 | ||
0a648150 | 1600 | if (si.ad) { |
7e75fc3f | 1601 | sfb_set_limits(CPUM_SF_MIN_SDB, CPUM_SF_MAX_SDB); |
0a648150 HB |
1602 | cpumsf_pmu_events_attr[1] = |
1603 | CPUMF_EVENT_PTR(SF, SF_CYCLES_BASIC_DIAG); | |
1604 | } | |
7e75fc3f | 1605 | |
8c069ff4 HB |
1606 | sfdbg = debug_register(KMSG_COMPONENT, 2, 1, 80); |
1607 | if (!sfdbg) | |
1608 | pr_err("Registering for s390dbf failed\n"); | |
1609 | debug_register_view(sfdbg, &debug_sprintf_view); | |
1610 | ||
1dad093b TH |
1611 | err = register_external_irq(EXT_IRQ_MEASURE_ALERT, |
1612 | cpumf_measurement_alert); | |
8c069ff4 | 1613 | if (err) { |
7e75fc3f | 1614 | pr_cpumsf_err(RS_INIT_FAILURE_ALRT); |
8c069ff4 HB |
1615 | goto out; |
1616 | } | |
1617 | ||
1618 | err = perf_pmu_register(&cpumf_sampling, "cpum_sf", PERF_TYPE_RAW); | |
1619 | if (err) { | |
7e75fc3f | 1620 | pr_cpumsf_err(RS_INIT_FAILURE_PERF); |
1dad093b TH |
1621 | unregister_external_irq(EXT_IRQ_MEASURE_ALERT, |
1622 | cpumf_measurement_alert); | |
8c069ff4 HB |
1623 | goto out; |
1624 | } | |
e3d617fe | 1625 | |
73c1b41e | 1626 | cpuhp_setup_state(CPUHP_AP_PERF_S390_SF_ONLINE, "perf/s390/sf:online", |
e3d617fe | 1627 | s390_pmu_sf_online_cpu, s390_pmu_sf_offline_cpu); |
8c069ff4 HB |
1628 | out: |
1629 | return err; | |
1630 | } | |
1631 | arch_initcall(init_cpum_sampling_pmu); | |
69f239ed | 1632 | core_param(cpum_sfb_size, CPUM_SF_MAX_SDB, sfb_size, 0640); |