]>
Commit | Line | Data |
---|---|---|
8c069ff4 HB |
1 | /* |
2 | * Performance event support for the System z CPU-measurement Sampling Facility | |
3 | * | |
4 | * Copyright IBM Corp. 2013 | |
5 | * Author(s): Hendrik Brueckner <brueckner@linux.vnet.ibm.com> | |
6 | * | |
7 | * This program is free software; you can redistribute it and/or modify | |
8 | * it under the terms of the GNU General Public License (version 2 only) | |
9 | * as published by the Free Software Foundation. | |
10 | */ | |
11 | #define KMSG_COMPONENT "cpum_sf" | |
12 | #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt | |
13 | ||
14 | #include <linux/kernel.h> | |
15 | #include <linux/kernel_stat.h> | |
16 | #include <linux/perf_event.h> | |
17 | #include <linux/percpu.h> | |
18 | #include <linux/notifier.h> | |
19 | #include <linux/export.h> | |
7e75fc3f | 20 | #include <linux/slab.h> |
69f239ed HB |
21 | #include <linux/mm.h> |
22 | #include <linux/moduleparam.h> | |
8c069ff4 HB |
23 | #include <asm/cpu_mf.h> |
24 | #include <asm/irq.h> | |
25 | #include <asm/debug.h> | |
26 | #include <asm/timex.h> | |
27 | ||
28 | /* Minimum number of sample-data-block-tables: | |
29 | * At least one table is required for the sampling buffer structure. | |
30 | * A single table contains up to 511 pointers to sample-data-blocks. | |
31 | */ | |
69f239ed | 32 | #define CPUM_SF_MIN_SDBT 1 |
8c069ff4 | 33 | |
69f239ed | 34 | /* Number of sample-data-blocks per sample-data-block-table (SDBT): |
7e75fc3f HB |
35 | * A table contains SDB pointers (8 bytes) and one table-link entry |
36 | * that points to the origin of the next SDBT. | |
8c069ff4 | 37 | */ |
69f239ed | 38 | #define CPUM_SF_SDB_PER_TABLE ((PAGE_SIZE - 8) / 8) |
8c069ff4 | 39 | |
69f239ed HB |
40 | /* Maximum page offset for an SDBT table-link entry: |
41 | * If this page offset is reached, a table-link entry to the next SDBT | |
42 | * must be added. | |
43 | */ | |
44 | #define CPUM_SF_SDBT_TL_OFFSET (CPUM_SF_SDB_PER_TABLE * 8) | |
45 | static inline int require_table_link(const void *sdbt) | |
46 | { | |
47 | return ((unsigned long) sdbt & ~PAGE_MASK) == CPUM_SF_SDBT_TL_OFFSET; | |
48 | } | |
49 | ||
50 | /* Minimum and maximum sampling buffer sizes: | |
51 | * | |
7e75fc3f HB |
52 | * This number represents the maximum size of the sampling buffer taking |
53 | * the number of sample-data-block-tables into account. Note that these | |
54 | * numbers apply to the basic-sampling function only. | |
55 | * The maximum number of SDBs is increased by CPUM_SF_SDB_DIAG_FACTOR if | |
56 | * the diagnostic-sampling function is active. | |
8c069ff4 | 57 | * |
69f239ed HB |
58 | * Sampling buffer size Buffer characteristics |
59 | * --------------------------------------------------- | |
60 | * 64KB == 16 pages (4KB per page) | |
61 | * 1 page for SDB-tables | |
62 | * 15 pages for SDBs | |
63 | * | |
64 | * 32MB == 8192 pages (4KB per page) | |
65 | * 16 pages for SDB-tables | |
66 | * 8176 pages for SDBs | |
8c069ff4 | 67 | */ |
69f239ed HB |
68 | static unsigned long __read_mostly CPUM_SF_MIN_SDB = 15; |
69 | static unsigned long __read_mostly CPUM_SF_MAX_SDB = 8176; | |
7e75fc3f | 70 | static unsigned long __read_mostly CPUM_SF_SDB_DIAG_FACTOR = 1; |
8c069ff4 HB |
71 | |
72 | struct sf_buffer { | |
69f239ed | 73 | unsigned long *sdbt; /* Sample-data-block-table origin */ |
8c069ff4 | 74 | /* buffer characteristics (required for buffer increments) */ |
69f239ed HB |
75 | unsigned long num_sdb; /* Number of sample-data-blocks */ |
76 | unsigned long num_sdbt; /* Number of sample-data-block-tables */ | |
77 | unsigned long *tail; /* last sample-data-block-table */ | |
8c069ff4 HB |
78 | }; |
79 | ||
80 | struct cpu_hw_sf { | |
81 | /* CPU-measurement sampling information block */ | |
82 | struct hws_qsi_info_block qsi; | |
69f239ed | 83 | /* CPU-measurement sampling control block */ |
8c069ff4 HB |
84 | struct hws_lsctl_request_block lsctl; |
85 | struct sf_buffer sfb; /* Sampling buffer */ | |
86 | unsigned int flags; /* Status flags */ | |
87 | struct perf_event *event; /* Scheduled perf event */ | |
88 | }; | |
89 | static DEFINE_PER_CPU(struct cpu_hw_sf, cpu_hw_sf); | |
90 | ||
91 | /* Debug feature */ | |
92 | static debug_info_t *sfdbg; | |
93 | ||
69f239ed HB |
94 | /* |
95 | * sf_disable() - Switch off sampling facility | |
96 | */ | |
97 | static int sf_disable(void) | |
98 | { | |
99 | struct hws_lsctl_request_block sreq; | |
100 | ||
101 | memset(&sreq, 0, sizeof(sreq)); | |
102 | return lsctl(&sreq); | |
103 | } | |
104 | ||
8c069ff4 HB |
105 | /* |
106 | * sf_buffer_available() - Check for an allocated sampling buffer | |
107 | */ | |
108 | static int sf_buffer_available(struct cpu_hw_sf *cpuhw) | |
109 | { | |
69f239ed | 110 | return !!cpuhw->sfb.sdbt; |
8c069ff4 HB |
111 | } |
112 | ||
113 | /* | |
114 | * deallocate sampling facility buffer | |
115 | */ | |
116 | static void free_sampling_buffer(struct sf_buffer *sfb) | |
117 | { | |
69f239ed | 118 | unsigned long *sdbt, *curr; |
8c069ff4 HB |
119 | |
120 | if (!sfb->sdbt) | |
121 | return; | |
122 | ||
123 | sdbt = sfb->sdbt; | |
69f239ed | 124 | curr = sdbt; |
8c069ff4 | 125 | |
69f239ed | 126 | /* Free the SDBT after all SDBs are processed... */ |
8c069ff4 HB |
127 | while (1) { |
128 | if (!*curr || !sdbt) | |
129 | break; | |
130 | ||
69f239ed | 131 | /* Process table-link entries */ |
8c069ff4 HB |
132 | if (is_link_entry(curr)) { |
133 | curr = get_next_sdbt(curr); | |
134 | if (sdbt) | |
69f239ed | 135 | free_page((unsigned long) sdbt); |
8c069ff4 | 136 | |
69f239ed HB |
137 | /* If the origin is reached, sampling buffer is freed */ |
138 | if (curr == sfb->sdbt) | |
8c069ff4 HB |
139 | break; |
140 | else | |
69f239ed | 141 | sdbt = curr; |
8c069ff4 | 142 | } else { |
69f239ed | 143 | /* Process SDB pointer */ |
8c069ff4 HB |
144 | if (*curr) { |
145 | free_page(*curr); | |
146 | curr++; | |
147 | } | |
148 | } | |
149 | } | |
150 | ||
151 | debug_sprintf_event(sfdbg, 5, | |
69f239ed | 152 | "free_sampling_buffer: freed sdbt=%p\n", sfb->sdbt); |
8c069ff4 HB |
153 | memset(sfb, 0, sizeof(*sfb)); |
154 | } | |
155 | ||
69f239ed HB |
156 | static int alloc_sample_data_block(unsigned long *sdbt, gfp_t gfp_flags) |
157 | { | |
158 | unsigned long sdb, *trailer; | |
159 | ||
160 | /* Allocate and initialize sample-data-block */ | |
161 | sdb = get_zeroed_page(gfp_flags); | |
162 | if (!sdb) | |
163 | return -ENOMEM; | |
164 | trailer = trailer_entry_ptr(sdb); | |
165 | *trailer = SDB_TE_ALERT_REQ_MASK; | |
166 | ||
167 | /* Link SDB into the sample-data-block-table */ | |
168 | *sdbt = sdb; | |
169 | ||
170 | return 0; | |
171 | } | |
172 | ||
173 | /* | |
174 | * realloc_sampling_buffer() - extend sampler memory | |
175 | * | |
176 | * Allocates new sample-data-blocks and adds them to the specified sampling | |
177 | * buffer memory. | |
178 | * | |
179 | * Important: This modifies the sampling buffer and must be called when the | |
180 | * sampling facility is disabled. | |
181 | * | |
182 | * Returns zero on success, non-zero otherwise. | |
183 | */ | |
184 | static int realloc_sampling_buffer(struct sf_buffer *sfb, | |
185 | unsigned long num_sdb, gfp_t gfp_flags) | |
186 | { | |
187 | int i, rc; | |
188 | unsigned long *new, *tail; | |
189 | ||
190 | if (!sfb->sdbt || !sfb->tail) | |
191 | return -EINVAL; | |
192 | ||
193 | if (!is_link_entry(sfb->tail)) | |
194 | return -EINVAL; | |
195 | ||
196 | /* Append to the existing sampling buffer, overwriting the table-link | |
197 | * register. | |
198 | * The tail variables always points to the "tail" (last and table-link) | |
199 | * entry in an SDB-table. | |
200 | */ | |
201 | tail = sfb->tail; | |
202 | ||
203 | /* Do a sanity check whether the table-link entry points to | |
204 | * the sampling buffer origin. | |
205 | */ | |
206 | if (sfb->sdbt != get_next_sdbt(tail)) { | |
207 | debug_sprintf_event(sfdbg, 3, "realloc_sampling_buffer: " | |
208 | "sampling buffer is not linked: origin=%p" | |
209 | "tail=%p\n", | |
210 | (void *) sfb->sdbt, (void *) tail); | |
211 | return -EINVAL; | |
212 | } | |
213 | ||
214 | /* Allocate remaining SDBs */ | |
215 | rc = 0; | |
216 | for (i = 0; i < num_sdb; i++) { | |
217 | /* Allocate a new SDB-table if it is full. */ | |
218 | if (require_table_link(tail)) { | |
219 | new = (unsigned long *) get_zeroed_page(gfp_flags); | |
220 | if (!new) { | |
221 | rc = -ENOMEM; | |
222 | break; | |
223 | } | |
224 | sfb->num_sdbt++; | |
225 | /* Link current page to tail of chain */ | |
226 | *tail = (unsigned long)(void *) new + 1; | |
227 | tail = new; | |
228 | } | |
229 | ||
230 | /* Allocate a new sample-data-block. | |
231 | * If there is not enough memory, stop the realloc process | |
232 | * and simply use what was allocated. If this is a temporary | |
233 | * issue, a new realloc call (if required) might succeed. | |
234 | */ | |
235 | rc = alloc_sample_data_block(tail, gfp_flags); | |
236 | if (rc) | |
237 | break; | |
238 | sfb->num_sdb++; | |
239 | tail++; | |
240 | } | |
241 | ||
242 | /* Link sampling buffer to its origin */ | |
243 | *tail = (unsigned long) sfb->sdbt + 1; | |
244 | sfb->tail = tail; | |
245 | ||
246 | debug_sprintf_event(sfdbg, 4, "realloc_sampling_buffer: new buffer" | |
247 | " settings: sdbt=%lu sdb=%lu\n", | |
248 | sfb->num_sdbt, sfb->num_sdb); | |
249 | return rc; | |
250 | } | |
251 | ||
8c069ff4 HB |
252 | /* |
253 | * allocate_sampling_buffer() - allocate sampler memory | |
254 | * | |
255 | * Allocates and initializes a sampling buffer structure using the | |
256 | * specified number of sample-data-blocks (SDB). For each allocation, | |
257 | * a 4K page is used. The number of sample-data-block-tables (SDBT) | |
258 | * are calculated from SDBs. | |
259 | * Also set the ALERT_REQ mask in each SDBs trailer. | |
260 | * | |
261 | * Returns zero on success, non-zero otherwise. | |
262 | */ | |
263 | static int alloc_sampling_buffer(struct sf_buffer *sfb, unsigned long num_sdb) | |
264 | { | |
69f239ed | 265 | int rc; |
8c069ff4 HB |
266 | |
267 | if (sfb->sdbt) | |
268 | return -EINVAL; | |
69f239ed HB |
269 | |
270 | /* Allocate the sample-data-block-table origin */ | |
271 | sfb->sdbt = (unsigned long *) get_zeroed_page(GFP_KERNEL); | |
272 | if (!sfb->sdbt) | |
273 | return -ENOMEM; | |
8c069ff4 | 274 | sfb->num_sdb = 0; |
69f239ed | 275 | sfb->num_sdbt = 1; |
8c069ff4 | 276 | |
69f239ed HB |
277 | /* Link the table origin to point to itself to prepare for |
278 | * realloc_sampling_buffer() invocation. | |
279 | */ | |
280 | sfb->tail = sfb->sdbt; | |
281 | *sfb->tail = (unsigned long)(void *) sfb->sdbt + 1; | |
8c069ff4 | 282 | |
69f239ed HB |
283 | /* Allocate requested number of sample-data-blocks */ |
284 | rc = realloc_sampling_buffer(sfb, num_sdb, GFP_KERNEL); | |
285 | if (rc) { | |
286 | free_sampling_buffer(sfb); | |
287 | debug_sprintf_event(sfdbg, 4, "alloc_sampling_buffer: " | |
288 | "realloc_sampling_buffer failed with rc=%i\n", rc); | |
289 | } else | |
290 | debug_sprintf_event(sfdbg, 4, | |
291 | "alloc_sampling_buffer: tear=%p dear=%p\n", | |
292 | sfb->sdbt, (void *) *sfb->sdbt); | |
293 | return rc; | |
294 | } | |
8c069ff4 | 295 | |
69f239ed HB |
296 | static void sfb_set_limits(unsigned long min, unsigned long max) |
297 | { | |
7e75fc3f HB |
298 | struct hws_qsi_info_block si; |
299 | ||
69f239ed HB |
300 | CPUM_SF_MIN_SDB = min; |
301 | CPUM_SF_MAX_SDB = max; | |
7e75fc3f HB |
302 | |
303 | memset(&si, 0, sizeof(si)); | |
304 | if (!qsi(&si)) | |
305 | CPUM_SF_SDB_DIAG_FACTOR = DIV_ROUND_UP(si.dsdes, si.bsdes); | |
306 | } | |
307 | ||
308 | static unsigned long sfb_max_limit(struct hw_perf_event *hwc) | |
309 | { | |
310 | return SAMPL_DIAG_MODE(hwc) ? CPUM_SF_MAX_SDB * CPUM_SF_SDB_DIAG_FACTOR | |
311 | : CPUM_SF_MAX_SDB; | |
69f239ed | 312 | } |
8c069ff4 | 313 | |
69f239ed HB |
314 | static unsigned long sfb_pending_allocs(struct sf_buffer *sfb, |
315 | struct hw_perf_event *hwc) | |
316 | { | |
317 | if (!sfb->sdbt) | |
318 | return SFB_ALLOC_REG(hwc); | |
319 | if (SFB_ALLOC_REG(hwc) > sfb->num_sdb) | |
320 | return SFB_ALLOC_REG(hwc) - sfb->num_sdb; | |
321 | return 0; | |
322 | } | |
8c069ff4 | 323 | |
69f239ed HB |
324 | static int sfb_has_pending_allocs(struct sf_buffer *sfb, |
325 | struct hw_perf_event *hwc) | |
326 | { | |
327 | return sfb_pending_allocs(sfb, hwc) > 0; | |
328 | } | |
8c069ff4 | 329 | |
69f239ed HB |
330 | static void sfb_account_allocs(unsigned long num, struct hw_perf_event *hwc) |
331 | { | |
7e75fc3f HB |
332 | /* Limit the number of SDBs to not exceed the maximum */ |
333 | num = min_t(unsigned long, num, sfb_max_limit(hwc) - SFB_ALLOC_REG(hwc)); | |
69f239ed HB |
334 | if (num) |
335 | SFB_ALLOC_REG(hwc) += num; | |
8c069ff4 HB |
336 | } |
337 | ||
69f239ed HB |
338 | static void sfb_init_allocs(unsigned long num, struct hw_perf_event *hwc) |
339 | { | |
340 | SFB_ALLOC_REG(hwc) = 0; | |
341 | sfb_account_allocs(num, hwc); | |
342 | } | |
343 | ||
7e75fc3f HB |
344 | static size_t event_sample_size(struct hw_perf_event *hwc) |
345 | { | |
346 | struct sf_raw_sample *sfr = (struct sf_raw_sample *) RAWSAMPLE_REG(hwc); | |
347 | size_t sample_size; | |
348 | ||
349 | /* The sample size depends on the sampling function: The basic-sampling | |
350 | * function must be always enabled, diagnostic-sampling function is | |
351 | * optional. | |
352 | */ | |
353 | sample_size = sfr->bsdes; | |
354 | if (SAMPL_DIAG_MODE(hwc)) | |
355 | sample_size += sfr->dsdes; | |
356 | ||
357 | return sample_size; | |
358 | } | |
359 | ||
360 | static void deallocate_buffers(struct cpu_hw_sf *cpuhw) | |
361 | { | |
362 | if (cpuhw->sfb.sdbt) | |
363 | free_sampling_buffer(&cpuhw->sfb); | |
364 | } | |
365 | ||
366 | static int allocate_buffers(struct cpu_hw_sf *cpuhw, struct hw_perf_event *hwc) | |
8c069ff4 | 367 | { |
7e75fc3f HB |
368 | unsigned long n_sdb, freq, factor; |
369 | size_t sfr_size, sample_size; | |
370 | struct sf_raw_sample *sfr; | |
371 | ||
372 | /* Allocate raw sample buffer | |
373 | * | |
374 | * The raw sample buffer is used to temporarily store sampling data | |
375 | * entries for perf raw sample processing. The buffer size mainly | |
376 | * depends on the size of diagnostic-sampling data entries which is | |
377 | * machine-specific. The exact size calculation includes: | |
378 | * 1. The first 4 bytes of diagnostic-sampling data entries are | |
379 | * already reflected in the sf_raw_sample structure. Subtract | |
380 | * these bytes. | |
381 | * 2. The perf raw sample data must be 8-byte aligned (u64) and | |
382 | * perf's internal data size must be considered too. So add | |
383 | * an additional u32 for correct alignment and subtract before | |
384 | * allocating the buffer. | |
385 | * 3. Store the raw sample buffer pointer in the perf event | |
386 | * hardware structure. | |
387 | */ | |
388 | sfr_size = ALIGN((sizeof(*sfr) - sizeof(sfr->diag) + cpuhw->qsi.dsdes) + | |
389 | sizeof(u32), sizeof(u64)); | |
390 | sfr_size -= sizeof(u32); | |
391 | sfr = kzalloc(sfr_size, GFP_KERNEL); | |
392 | if (!sfr) | |
393 | return -ENOMEM; | |
394 | sfr->size = sfr_size; | |
395 | sfr->bsdes = cpuhw->qsi.bsdes; | |
396 | sfr->dsdes = cpuhw->qsi.dsdes; | |
397 | RAWSAMPLE_REG(hwc) = (unsigned long) sfr; | |
8c069ff4 HB |
398 | |
399 | /* Calculate sampling buffers using 4K pages | |
400 | * | |
7e75fc3f HB |
401 | * 1. Determine the sample data size which depends on the used |
402 | * sampling functions, for example, basic-sampling or | |
403 | * basic-sampling with diagnostic-sampling. | |
404 | * | |
405 | * 2. Use the sampling frequency as input. The sampling buffer is | |
406 | * designed for almost one second. This can be adjusted through | |
407 | * the "factor" variable. | |
8c069ff4 | 408 | * In any case, alloc_sampling_buffer() sets the Alert Request |
7e75fc3f | 409 | * Control indicator to trigger a measurement-alert to harvest |
8c069ff4 HB |
410 | * sample-data-blocks (sdb). |
411 | * | |
7e75fc3f | 412 | * 3. Compute the number of sample-data-blocks and ensure a minimum |
8c069ff4 | 413 | * of CPUM_SF_MIN_SDB. Also ensure the upper limit does not |
7e75fc3f HB |
414 | * exceed a "calculated" maximum. The symbolic maximum is |
415 | * designed for basic-sampling only and needs to be increased if | |
416 | * diagnostic-sampling is active. | |
417 | * See also the remarks for these symbolic constants. | |
8c069ff4 | 418 | * |
7e75fc3f HB |
419 | * 4. Compute the number of sample-data-block-tables (SDBT) and |
420 | * ensure a minimum of CPUM_SF_MIN_SDBT (one table can manage up | |
421 | * to 511 SDBs). | |
8c069ff4 | 422 | */ |
7e75fc3f | 423 | sample_size = event_sample_size(hwc); |
8c069ff4 HB |
424 | freq = sample_rate_to_freq(&cpuhw->qsi, SAMPL_RATE(hwc)); |
425 | factor = 1; | |
7e75fc3f | 426 | n_sdb = DIV_ROUND_UP(freq, factor * ((PAGE_SIZE-64) / sample_size)); |
8c069ff4 HB |
427 | if (n_sdb < CPUM_SF_MIN_SDB) |
428 | n_sdb = CPUM_SF_MIN_SDB; | |
429 | ||
69f239ed HB |
430 | /* If there is already a sampling buffer allocated, it is very likely |
431 | * that the sampling facility is enabled too. If the event to be | |
432 | * initialized requires a greater sampling buffer, the allocation must | |
433 | * be postponed. Changing the sampling buffer requires the sampling | |
434 | * facility to be in the disabled state. So, account the number of | |
435 | * required SDBs and let cpumsf_pmu_enable() resize the buffer just | |
436 | * before the event is started. | |
8c069ff4 | 437 | */ |
69f239ed | 438 | sfb_init_allocs(n_sdb, hwc); |
8c069ff4 HB |
439 | if (sf_buffer_available(cpuhw)) |
440 | return 0; | |
441 | ||
442 | debug_sprintf_event(sfdbg, 3, | |
7e75fc3f HB |
443 | "allocate_buffers: rate=%lu f=%lu sdb=%lu/%lu" |
444 | " sample_size=%lu cpuhw=%p\n", | |
445 | SAMPL_RATE(hwc), freq, n_sdb, sfb_max_limit(hwc), | |
446 | sample_size, cpuhw); | |
8c069ff4 HB |
447 | |
448 | return alloc_sampling_buffer(&cpuhw->sfb, | |
69f239ed | 449 | sfb_pending_allocs(&cpuhw->sfb, hwc)); |
8c069ff4 HB |
450 | } |
451 | ||
69f239ed HB |
452 | static unsigned long min_percent(unsigned int percent, unsigned long base, |
453 | unsigned long min) | |
454 | { | |
455 | return min_t(unsigned long, min, DIV_ROUND_UP(percent * base, 100)); | |
456 | } | |
8c069ff4 | 457 | |
69f239ed HB |
458 | static unsigned long compute_sfb_extent(unsigned long ratio, unsigned long base) |
459 | { | |
460 | /* Use a percentage-based approach to extend the sampling facility | |
461 | * buffer. Accept up to 5% sample data loss. | |
462 | * Vary the extents between 1% to 5% of the current number of | |
463 | * sample-data-blocks. | |
464 | */ | |
465 | if (ratio <= 5) | |
466 | return 0; | |
467 | if (ratio <= 25) | |
468 | return min_percent(1, base, 1); | |
469 | if (ratio <= 50) | |
470 | return min_percent(1, base, 1); | |
471 | if (ratio <= 75) | |
472 | return min_percent(2, base, 2); | |
473 | if (ratio <= 100) | |
474 | return min_percent(3, base, 3); | |
475 | if (ratio <= 250) | |
476 | return min_percent(4, base, 4); | |
477 | ||
478 | return min_percent(5, base, 8); | |
479 | } | |
8c069ff4 | 480 | |
69f239ed HB |
481 | static void sfb_account_overflows(struct cpu_hw_sf *cpuhw, |
482 | struct hw_perf_event *hwc) | |
483 | { | |
484 | unsigned long ratio, num; | |
485 | ||
486 | if (!OVERFLOW_REG(hwc)) | |
487 | return; | |
488 | ||
489 | /* The sample_overflow contains the average number of sample data | |
490 | * that has been lost because sample-data-blocks were full. | |
491 | * | |
492 | * Calculate the total number of sample data entries that has been | |
493 | * discarded. Then calculate the ratio of lost samples to total samples | |
494 | * per second in percent. | |
495 | */ | |
496 | ratio = DIV_ROUND_UP(100 * OVERFLOW_REG(hwc) * cpuhw->sfb.num_sdb, | |
497 | sample_rate_to_freq(&cpuhw->qsi, SAMPL_RATE(hwc))); | |
498 | ||
499 | /* Compute number of sample-data-blocks */ | |
500 | num = compute_sfb_extent(ratio, cpuhw->sfb.num_sdb); | |
501 | if (num) | |
502 | sfb_account_allocs(num, hwc); | |
503 | ||
504 | debug_sprintf_event(sfdbg, 5, "sfb: overflow: overflow=%llu ratio=%lu" | |
505 | " num=%lu\n", OVERFLOW_REG(hwc), ratio, num); | |
506 | OVERFLOW_REG(hwc) = 0; | |
507 | } | |
508 | ||
509 | /* extend_sampling_buffer() - Extend sampling buffer | |
510 | * @sfb: Sampling buffer structure (for local CPU) | |
511 | * @hwc: Perf event hardware structure | |
512 | * | |
513 | * Use this function to extend the sampling buffer based on the overflow counter | |
514 | * and postponed allocation extents stored in the specified Perf event hardware. | |
515 | * | |
516 | * Important: This function disables the sampling facility in order to safely | |
517 | * change the sampling buffer structure. Do not call this function | |
518 | * when the PMU is active. | |
8c069ff4 | 519 | */ |
69f239ed HB |
520 | static void extend_sampling_buffer(struct sf_buffer *sfb, |
521 | struct hw_perf_event *hwc) | |
8c069ff4 | 522 | { |
69f239ed HB |
523 | unsigned long num, num_old; |
524 | int rc; | |
8c069ff4 | 525 | |
69f239ed HB |
526 | num = sfb_pending_allocs(sfb, hwc); |
527 | if (!num) | |
528 | return; | |
529 | num_old = sfb->num_sdb; | |
530 | ||
531 | /* Disable the sampling facility to reset any states and also | |
532 | * clear pending measurement alerts. | |
533 | */ | |
534 | sf_disable(); | |
535 | ||
536 | /* Extend the sampling buffer. | |
537 | * This memory allocation typically happens in an atomic context when | |
538 | * called by perf. Because this is a reallocation, it is fine if the | |
539 | * new SDB-request cannot be satisfied immediately. | |
540 | */ | |
541 | rc = realloc_sampling_buffer(sfb, num, GFP_ATOMIC); | |
542 | if (rc) | |
543 | debug_sprintf_event(sfdbg, 5, "sfb: extend: realloc " | |
544 | "failed with rc=%i\n", rc); | |
545 | ||
546 | if (sfb_has_pending_allocs(sfb, hwc)) | |
547 | debug_sprintf_event(sfdbg, 5, "sfb: extend: " | |
548 | "req=%lu alloc=%lu remaining=%lu\n", | |
549 | num, sfb->num_sdb - num_old, | |
550 | sfb_pending_allocs(sfb, hwc)); | |
8c069ff4 HB |
551 | } |
552 | ||
553 | ||
69f239ed HB |
554 | /* Number of perf events counting hardware events */ |
555 | static atomic_t num_events; | |
556 | /* Used to avoid races in calling reserve/release_cpumf_hardware */ | |
557 | static DEFINE_MUTEX(pmc_reserve_mutex); | |
558 | ||
8c069ff4 HB |
559 | #define PMC_INIT 0 |
560 | #define PMC_RELEASE 1 | |
e28bb79d | 561 | #define PMC_FAILURE 2 |
8c069ff4 HB |
562 | static void setup_pmc_cpu(void *flags) |
563 | { | |
564 | int err; | |
eb7e7d76 | 565 | struct cpu_hw_sf *cpusf = this_cpu_ptr(&cpu_hw_sf); |
8c069ff4 | 566 | |
8c069ff4 HB |
567 | err = 0; |
568 | switch (*((int *) flags)) { | |
569 | case PMC_INIT: | |
570 | memset(cpusf, 0, sizeof(*cpusf)); | |
571 | err = qsi(&cpusf->qsi); | |
572 | if (err) | |
573 | break; | |
574 | cpusf->flags |= PMU_F_RESERVED; | |
575 | err = sf_disable(); | |
576 | if (err) | |
577 | pr_err("Switching off the sampling facility failed " | |
578 | "with rc=%i\n", err); | |
579 | debug_sprintf_event(sfdbg, 5, | |
580 | "setup_pmc_cpu: initialized: cpuhw=%p\n", cpusf); | |
581 | break; | |
582 | case PMC_RELEASE: | |
583 | cpusf->flags &= ~PMU_F_RESERVED; | |
584 | err = sf_disable(); | |
585 | if (err) { | |
586 | pr_err("Switching off the sampling facility failed " | |
587 | "with rc=%i\n", err); | |
7e75fc3f HB |
588 | } else |
589 | deallocate_buffers(cpusf); | |
8c069ff4 HB |
590 | debug_sprintf_event(sfdbg, 5, |
591 | "setup_pmc_cpu: released: cpuhw=%p\n", cpusf); | |
592 | break; | |
593 | } | |
e28bb79d HB |
594 | if (err) |
595 | *((int *) flags) |= PMC_FAILURE; | |
8c069ff4 HB |
596 | } |
597 | ||
598 | static void release_pmc_hardware(void) | |
599 | { | |
600 | int flags = PMC_RELEASE; | |
601 | ||
602 | irq_subclass_unregister(IRQ_SUBCLASS_MEASUREMENT_ALERT); | |
603 | on_each_cpu(setup_pmc_cpu, &flags, 1); | |
e28bb79d | 604 | perf_release_sampling(); |
8c069ff4 HB |
605 | } |
606 | ||
607 | static int reserve_pmc_hardware(void) | |
608 | { | |
609 | int flags = PMC_INIT; | |
e28bb79d | 610 | int err; |
8c069ff4 | 611 | |
e28bb79d HB |
612 | err = perf_reserve_sampling(); |
613 | if (err) | |
614 | return err; | |
8c069ff4 | 615 | on_each_cpu(setup_pmc_cpu, &flags, 1); |
e28bb79d HB |
616 | if (flags & PMC_FAILURE) { |
617 | release_pmc_hardware(); | |
618 | return -ENODEV; | |
619 | } | |
8c069ff4 HB |
620 | irq_subclass_register(IRQ_SUBCLASS_MEASUREMENT_ALERT); |
621 | ||
622 | return 0; | |
623 | } | |
624 | ||
625 | static void hw_perf_event_destroy(struct perf_event *event) | |
626 | { | |
7e75fc3f HB |
627 | /* Free raw sample buffer */ |
628 | if (RAWSAMPLE_REG(&event->hw)) | |
629 | kfree((void *) RAWSAMPLE_REG(&event->hw)); | |
630 | ||
8c069ff4 HB |
631 | /* Release PMC if this is the last perf event */ |
632 | if (!atomic_add_unless(&num_events, -1, 1)) { | |
633 | mutex_lock(&pmc_reserve_mutex); | |
634 | if (atomic_dec_return(&num_events) == 0) | |
635 | release_pmc_hardware(); | |
636 | mutex_unlock(&pmc_reserve_mutex); | |
637 | } | |
638 | } | |
639 | ||
640 | static void hw_init_period(struct hw_perf_event *hwc, u64 period) | |
641 | { | |
642 | hwc->sample_period = period; | |
643 | hwc->last_period = hwc->sample_period; | |
644 | local64_set(&hwc->period_left, hwc->sample_period); | |
645 | } | |
646 | ||
647 | static void hw_reset_registers(struct hw_perf_event *hwc, | |
69f239ed | 648 | unsigned long *sdbt_origin) |
8c069ff4 | 649 | { |
7e75fc3f HB |
650 | struct sf_raw_sample *sfr; |
651 | ||
69f239ed HB |
652 | /* (Re)set to first sample-data-block-table */ |
653 | TEAR_REG(hwc) = (unsigned long) sdbt_origin; | |
7e75fc3f HB |
654 | |
655 | /* (Re)set raw sampling buffer register */ | |
656 | sfr = (struct sf_raw_sample *) RAWSAMPLE_REG(hwc); | |
657 | memset(&sfr->basic, 0, sizeof(sfr->basic)); | |
658 | memset(&sfr->diag, 0, sfr->dsdes); | |
8c069ff4 HB |
659 | } |
660 | ||
661 | static unsigned long hw_limit_rate(const struct hws_qsi_info_block *si, | |
662 | unsigned long rate) | |
663 | { | |
69f239ed HB |
664 | return clamp_t(unsigned long, rate, |
665 | si->min_sampl_rate, si->max_sampl_rate); | |
8c069ff4 HB |
666 | } |
667 | ||
668 | static int __hw_perf_event_init(struct perf_event *event) | |
669 | { | |
670 | struct cpu_hw_sf *cpuhw; | |
671 | struct hws_qsi_info_block si; | |
672 | struct perf_event_attr *attr = &event->attr; | |
673 | struct hw_perf_event *hwc = &event->hw; | |
674 | unsigned long rate; | |
675 | int cpu, err; | |
676 | ||
677 | /* Reserve CPU-measurement sampling facility */ | |
678 | err = 0; | |
679 | if (!atomic_inc_not_zero(&num_events)) { | |
680 | mutex_lock(&pmc_reserve_mutex); | |
681 | if (atomic_read(&num_events) == 0 && reserve_pmc_hardware()) | |
682 | err = -EBUSY; | |
683 | else | |
684 | atomic_inc(&num_events); | |
685 | mutex_unlock(&pmc_reserve_mutex); | |
686 | } | |
687 | event->destroy = hw_perf_event_destroy; | |
688 | ||
689 | if (err) | |
690 | goto out; | |
691 | ||
692 | /* Access per-CPU sampling information (query sampling info) */ | |
693 | /* | |
694 | * The event->cpu value can be -1 to count on every CPU, for example, | |
695 | * when attaching to a task. If this is specified, use the query | |
696 | * sampling info from the current CPU, otherwise use event->cpu to | |
697 | * retrieve the per-CPU information. | |
698 | * Later, cpuhw indicates whether to allocate sampling buffers for a | |
699 | * particular CPU (cpuhw!=NULL) or each online CPU (cpuw==NULL). | |
700 | */ | |
701 | memset(&si, 0, sizeof(si)); | |
702 | cpuhw = NULL; | |
703 | if (event->cpu == -1) | |
704 | qsi(&si); | |
705 | else { | |
706 | /* Event is pinned to a particular CPU, retrieve the per-CPU | |
707 | * sampling structure for accessing the CPU-specific QSI. | |
708 | */ | |
709 | cpuhw = &per_cpu(cpu_hw_sf, event->cpu); | |
710 | si = cpuhw->qsi; | |
711 | } | |
712 | ||
713 | /* Check sampling facility authorization and, if not authorized, | |
714 | * fall back to other PMUs. It is safe to check any CPU because | |
715 | * the authorization is identical for all configured CPUs. | |
716 | */ | |
717 | if (!si.as) { | |
718 | err = -ENOENT; | |
719 | goto out; | |
720 | } | |
721 | ||
7e75fc3f HB |
722 | /* Always enable basic sampling */ |
723 | SAMPL_FLAGS(hwc) = PERF_CPUM_SF_BASIC_MODE; | |
724 | ||
725 | /* Check if diagnostic sampling is requested. Deny if the required | |
726 | * sampling authorization is missing. | |
727 | */ | |
728 | if (attr->config == PERF_EVENT_CPUM_SF_DIAG) { | |
729 | if (!si.ad) { | |
730 | err = -EPERM; | |
731 | goto out; | |
732 | } | |
733 | SAMPL_FLAGS(hwc) |= PERF_CPUM_SF_DIAG_MODE; | |
734 | } | |
735 | ||
d7528862 HB |
736 | /* Check and set other sampling flags */ |
737 | if (attr->config1 & PERF_CPUM_SF_FULL_BLOCKS) | |
738 | SAMPL_FLAGS(hwc) |= PERF_CPUM_SF_FULL_BLOCKS; | |
739 | ||
8c069ff4 HB |
740 | /* The sampling information (si) contains information about the |
741 | * min/max sampling intervals and the CPU speed. So calculate the | |
742 | * correct sampling interval and avoid the whole period adjust | |
743 | * feedback loop. | |
744 | */ | |
745 | rate = 0; | |
746 | if (attr->freq) { | |
747 | rate = freq_to_sample_rate(&si, attr->sample_freq); | |
748 | rate = hw_limit_rate(&si, rate); | |
749 | attr->freq = 0; | |
750 | attr->sample_period = rate; | |
751 | } else { | |
752 | /* The min/max sampling rates specifies the valid range | |
753 | * of sample periods. If the specified sample period is | |
754 | * out of range, limit the period to the range boundary. | |
755 | */ | |
756 | rate = hw_limit_rate(&si, hwc->sample_period); | |
757 | ||
758 | /* The perf core maintains a maximum sample rate that is | |
759 | * configurable through the sysctl interface. Ensure the | |
760 | * sampling rate does not exceed this value. This also helps | |
761 | * to avoid throttling when pushing samples with | |
762 | * perf_event_overflow(). | |
763 | */ | |
764 | if (sample_rate_to_freq(&si, rate) > | |
765 | sysctl_perf_event_sample_rate) { | |
766 | err = -EINVAL; | |
767 | debug_sprintf_event(sfdbg, 1, "Sampling rate exceeds maximum perf sample rate\n"); | |
768 | goto out; | |
769 | } | |
770 | } | |
771 | SAMPL_RATE(hwc) = rate; | |
772 | hw_init_period(hwc, SAMPL_RATE(hwc)); | |
773 | ||
69f239ed HB |
774 | /* Initialize sample data overflow accounting */ |
775 | hwc->extra_reg.reg = REG_OVERFLOW; | |
776 | OVERFLOW_REG(hwc) = 0; | |
777 | ||
8c069ff4 HB |
778 | /* Allocate the per-CPU sampling buffer using the CPU information |
779 | * from the event. If the event is not pinned to a particular | |
780 | * CPU (event->cpu == -1; or cpuhw == NULL), allocate sampling | |
781 | * buffers for each online CPU. | |
782 | */ | |
783 | if (cpuhw) | |
784 | /* Event is pinned to a particular CPU */ | |
7e75fc3f | 785 | err = allocate_buffers(cpuhw, hwc); |
8c069ff4 HB |
786 | else { |
787 | /* Event is not pinned, allocate sampling buffer on | |
788 | * each online CPU | |
789 | */ | |
790 | for_each_online_cpu(cpu) { | |
791 | cpuhw = &per_cpu(cpu_hw_sf, cpu); | |
7e75fc3f | 792 | err = allocate_buffers(cpuhw, hwc); |
8c069ff4 HB |
793 | if (err) |
794 | break; | |
795 | } | |
796 | } | |
797 | out: | |
798 | return err; | |
799 | } | |
800 | ||
801 | static int cpumsf_pmu_event_init(struct perf_event *event) | |
802 | { | |
803 | int err; | |
804 | ||
55baa2f8 HB |
805 | /* No support for taken branch sampling */ |
806 | if (has_branch_stack(event)) | |
807 | return -EOPNOTSUPP; | |
808 | ||
809 | switch (event->attr.type) { | |
810 | case PERF_TYPE_RAW: | |
7e75fc3f HB |
811 | if ((event->attr.config != PERF_EVENT_CPUM_SF) && |
812 | (event->attr.config != PERF_EVENT_CPUM_SF_DIAG)) | |
55baa2f8 HB |
813 | return -ENOENT; |
814 | break; | |
815 | case PERF_TYPE_HARDWARE: | |
816 | /* Support sampling of CPU cycles in addition to the | |
817 | * counter facility. However, the counter facility | |
818 | * is more precise and, hence, restrict this PMU to | |
819 | * sampling events only. | |
820 | */ | |
821 | if (event->attr.config != PERF_COUNT_HW_CPU_CYCLES) | |
822 | return -ENOENT; | |
823 | if (!is_sampling_event(event)) | |
824 | return -ENOENT; | |
825 | break; | |
826 | default: | |
8c069ff4 | 827 | return -ENOENT; |
55baa2f8 | 828 | } |
8c069ff4 | 829 | |
dd127b3b | 830 | /* Check online status of the CPU to which the event is pinned */ |
8c069ff4 HB |
831 | if (event->cpu >= nr_cpumask_bits || |
832 | (event->cpu >= 0 && !cpu_online(event->cpu))) | |
833 | return -ENODEV; | |
834 | ||
dd127b3b HB |
835 | /* Force reset of idle/hv excludes regardless of what the |
836 | * user requested. | |
837 | */ | |
838 | if (event->attr.exclude_hv) | |
839 | event->attr.exclude_hv = 0; | |
840 | if (event->attr.exclude_idle) | |
841 | event->attr.exclude_idle = 0; | |
842 | ||
8c069ff4 HB |
843 | err = __hw_perf_event_init(event); |
844 | if (unlikely(err)) | |
845 | if (event->destroy) | |
846 | event->destroy(event); | |
847 | return err; | |
848 | } | |
849 | ||
850 | static void cpumsf_pmu_enable(struct pmu *pmu) | |
851 | { | |
eb7e7d76 | 852 | struct cpu_hw_sf *cpuhw = this_cpu_ptr(&cpu_hw_sf); |
69f239ed | 853 | struct hw_perf_event *hwc; |
8c069ff4 HB |
854 | int err; |
855 | ||
856 | if (cpuhw->flags & PMU_F_ENABLED) | |
857 | return; | |
858 | ||
859 | if (cpuhw->flags & PMU_F_ERR_MASK) | |
860 | return; | |
861 | ||
69f239ed HB |
862 | /* Check whether to extent the sampling buffer. |
863 | * | |
864 | * Two conditions trigger an increase of the sampling buffer for a | |
865 | * perf event: | |
866 | * 1. Postponed buffer allocations from the event initialization. | |
867 | * 2. Sampling overflows that contribute to pending allocations. | |
868 | * | |
869 | * Note that the extend_sampling_buffer() function disables the sampling | |
870 | * facility, but it can be fully re-enabled using sampling controls that | |
871 | * have been saved in cpumsf_pmu_disable(). | |
872 | */ | |
873 | if (cpuhw->event) { | |
874 | hwc = &cpuhw->event->hw; | |
875 | /* Account number of overflow-designated buffer extents */ | |
876 | sfb_account_overflows(cpuhw, hwc); | |
877 | if (sfb_has_pending_allocs(&cpuhw->sfb, hwc)) | |
878 | extend_sampling_buffer(&cpuhw->sfb, hwc); | |
879 | } | |
880 | ||
881 | /* (Re)enable the PMU and sampling facility */ | |
8c069ff4 HB |
882 | cpuhw->flags |= PMU_F_ENABLED; |
883 | barrier(); | |
884 | ||
885 | err = lsctl(&cpuhw->lsctl); | |
886 | if (err) { | |
887 | cpuhw->flags &= ~PMU_F_ENABLED; | |
888 | pr_err("Loading sampling controls failed: op=%i err=%i\n", | |
889 | 1, err); | |
890 | return; | |
891 | } | |
892 | ||
7e75fc3f HB |
893 | debug_sprintf_event(sfdbg, 6, "pmu_enable: es=%i cs=%i ed=%i cd=%i " |
894 | "tear=%p dear=%p\n", cpuhw->lsctl.es, cpuhw->lsctl.cs, | |
895 | cpuhw->lsctl.ed, cpuhw->lsctl.cd, | |
8c069ff4 HB |
896 | (void *) cpuhw->lsctl.tear, (void *) cpuhw->lsctl.dear); |
897 | } | |
898 | ||
899 | static void cpumsf_pmu_disable(struct pmu *pmu) | |
900 | { | |
eb7e7d76 | 901 | struct cpu_hw_sf *cpuhw = this_cpu_ptr(&cpu_hw_sf); |
8c069ff4 HB |
902 | struct hws_lsctl_request_block inactive; |
903 | struct hws_qsi_info_block si; | |
904 | int err; | |
905 | ||
906 | if (!(cpuhw->flags & PMU_F_ENABLED)) | |
907 | return; | |
908 | ||
909 | if (cpuhw->flags & PMU_F_ERR_MASK) | |
910 | return; | |
911 | ||
912 | /* Switch off sampling activation control */ | |
913 | inactive = cpuhw->lsctl; | |
914 | inactive.cs = 0; | |
7e75fc3f | 915 | inactive.cd = 0; |
8c069ff4 HB |
916 | |
917 | err = lsctl(&inactive); | |
918 | if (err) { | |
919 | pr_err("Loading sampling controls failed: op=%i err=%i\n", | |
920 | 2, err); | |
921 | return; | |
922 | } | |
923 | ||
924 | /* Save state of TEAR and DEAR register contents */ | |
925 | if (!qsi(&si)) { | |
926 | /* TEAR/DEAR values are valid only if the sampling facility is | |
927 | * enabled. Note that cpumsf_pmu_disable() might be called even | |
928 | * for a disabled sampling facility because cpumsf_pmu_enable() | |
929 | * controls the enable/disable state. | |
930 | */ | |
931 | if (si.es) { | |
932 | cpuhw->lsctl.tear = si.tear; | |
933 | cpuhw->lsctl.dear = si.dear; | |
934 | } | |
935 | } else | |
936 | debug_sprintf_event(sfdbg, 3, "cpumsf_pmu_disable: " | |
937 | "qsi() failed with err=%i\n", err); | |
938 | ||
939 | cpuhw->flags &= ~PMU_F_ENABLED; | |
940 | } | |
941 | ||
dd127b3b HB |
942 | /* perf_exclude_event() - Filter event |
943 | * @event: The perf event | |
944 | * @regs: pt_regs structure | |
945 | * @sde_regs: Sample-data-entry (sde) regs structure | |
946 | * | |
947 | * Filter perf events according to their exclude specification. | |
948 | * | |
949 | * Return non-zero if the event shall be excluded. | |
950 | */ | |
951 | static int perf_exclude_event(struct perf_event *event, struct pt_regs *regs, | |
952 | struct perf_sf_sde_regs *sde_regs) | |
953 | { | |
954 | if (event->attr.exclude_user && user_mode(regs)) | |
955 | return 1; | |
956 | if (event->attr.exclude_kernel && !user_mode(regs)) | |
957 | return 1; | |
958 | if (event->attr.exclude_guest && sde_regs->in_guest) | |
959 | return 1; | |
960 | if (event->attr.exclude_host && !sde_regs->in_guest) | |
961 | return 1; | |
962 | return 0; | |
963 | } | |
964 | ||
8c069ff4 HB |
965 | /* perf_push_sample() - Push samples to perf |
966 | * @event: The perf event | |
967 | * @sample: Hardware sample data | |
968 | * | |
969 | * Use the hardware sample data to create perf event sample. The sample | |
970 | * is the pushed to the event subsystem and the function checks for | |
971 | * possible event overflows. If an event overflow occurs, the PMU is | |
972 | * stopped. | |
973 | * | |
974 | * Return non-zero if an event overflow occurred. | |
975 | */ | |
7e75fc3f | 976 | static int perf_push_sample(struct perf_event *event, struct sf_raw_sample *sfr) |
8c069ff4 HB |
977 | { |
978 | int overflow; | |
979 | struct pt_regs regs; | |
443e802b | 980 | struct perf_sf_sde_regs *sde_regs; |
8c069ff4 | 981 | struct perf_sample_data data; |
7e75fc3f | 982 | struct perf_raw_record raw; |
8c069ff4 | 983 | |
7e75fc3f | 984 | /* Setup perf sample */ |
8c069ff4 | 985 | perf_sample_data_init(&data, 0, event->hw.last_period); |
7e75fc3f HB |
986 | raw.size = sfr->size; |
987 | raw.data = sfr; | |
988 | data.raw = &raw; | |
8c069ff4 | 989 | |
443e802b HB |
990 | /* Setup pt_regs to look like an CPU-measurement external interrupt |
991 | * using the Program Request Alert code. The regs.int_parm_long | |
992 | * field which is unused contains additional sample-data-entry related | |
993 | * indicators. | |
994 | */ | |
8c069ff4 | 995 | memset(®s, 0, sizeof(regs)); |
443e802b HB |
996 | regs.int_code = 0x1407; |
997 | regs.int_parm = CPU_MF_INT_SF_PRA; | |
998 | sde_regs = (struct perf_sf_sde_regs *) ®s.int_parm_long; | |
999 | ||
7e75fc3f HB |
1000 | regs.psw.addr = sfr->basic.ia; |
1001 | if (sfr->basic.T) | |
8c069ff4 | 1002 | regs.psw.mask |= PSW_MASK_DAT; |
7e75fc3f | 1003 | if (sfr->basic.W) |
8c069ff4 | 1004 | regs.psw.mask |= PSW_MASK_WAIT; |
7e75fc3f | 1005 | if (sfr->basic.P) |
8c069ff4 | 1006 | regs.psw.mask |= PSW_MASK_PSTATE; |
7e75fc3f | 1007 | switch (sfr->basic.AS) { |
8c069ff4 HB |
1008 | case 0x0: |
1009 | regs.psw.mask |= PSW_ASC_PRIMARY; | |
1010 | break; | |
1011 | case 0x1: | |
1012 | regs.psw.mask |= PSW_ASC_ACCREG; | |
1013 | break; | |
1014 | case 0x2: | |
1015 | regs.psw.mask |= PSW_ASC_SECONDARY; | |
1016 | break; | |
1017 | case 0x3: | |
1018 | regs.psw.mask |= PSW_ASC_HOME; | |
1019 | break; | |
1020 | } | |
1021 | ||
443e802b HB |
1022 | /* The host-program-parameter (hpp) contains the sie control |
1023 | * block that is set by sie64a() in entry64.S. Check if hpp | |
1024 | * refers to a valid control block and set sde_regs flags | |
1025 | * accordingly. This would allow to use hpp values for other | |
1026 | * purposes too. | |
1027 | * For now, simply use a non-zero value as guest indicator. | |
1028 | */ | |
7e75fc3f | 1029 | if (sfr->basic.hpp) |
443e802b HB |
1030 | sde_regs->in_guest = 1; |
1031 | ||
8c069ff4 | 1032 | overflow = 0; |
dd127b3b HB |
1033 | if (perf_exclude_event(event, ®s, sde_regs)) |
1034 | goto out; | |
8c069ff4 HB |
1035 | if (perf_event_overflow(event, &data, ®s)) { |
1036 | overflow = 1; | |
1037 | event->pmu->stop(event, 0); | |
8c069ff4 HB |
1038 | } |
1039 | perf_event_update_userpage(event); | |
dd127b3b | 1040 | out: |
8c069ff4 HB |
1041 | return overflow; |
1042 | } | |
1043 | ||
1044 | static void perf_event_count_update(struct perf_event *event, u64 count) | |
1045 | { | |
1046 | local64_add(count, &event->count); | |
1047 | } | |
1048 | ||
7e75fc3f HB |
1049 | static int sample_format_is_valid(struct hws_combined_entry *sample, |
1050 | unsigned int flags) | |
1051 | { | |
1052 | if (likely(flags & PERF_CPUM_SF_BASIC_MODE)) | |
1053 | /* Only basic-sampling data entries with data-entry-format | |
1054 | * version of 0x0001 can be processed. | |
1055 | */ | |
1056 | if (sample->basic.def != 0x0001) | |
1057 | return 0; | |
1058 | if (flags & PERF_CPUM_SF_DIAG_MODE) | |
1059 | /* The data-entry-format number of diagnostic-sampling data | |
1060 | * entries can vary. Because diagnostic data is just passed | |
1061 | * through, do only a sanity check on the DEF. | |
1062 | */ | |
1063 | if (sample->diag.def < 0x8001) | |
1064 | return 0; | |
1065 | return 1; | |
1066 | } | |
1067 | ||
1068 | static int sample_is_consistent(struct hws_combined_entry *sample, | |
1069 | unsigned long flags) | |
1070 | { | |
1071 | /* This check applies only to basic-sampling data entries of potentially | |
1072 | * combined-sampling data entries. Invalid entries cannot be processed | |
1073 | * by the PMU and, thus, do not deliver an associated | |
1074 | * diagnostic-sampling data entry. | |
1075 | */ | |
1076 | if (unlikely(!(flags & PERF_CPUM_SF_BASIC_MODE))) | |
1077 | return 0; | |
1078 | /* | |
1079 | * Samples are skipped, if they are invalid or for which the | |
1080 | * instruction address is not predictable, i.e., the wait-state bit is | |
1081 | * set. | |
1082 | */ | |
1083 | if (sample->basic.I || sample->basic.W) | |
1084 | return 0; | |
1085 | return 1; | |
1086 | } | |
1087 | ||
1088 | static void reset_sample_slot(struct hws_combined_entry *sample, | |
1089 | unsigned long flags) | |
1090 | { | |
1091 | if (likely(flags & PERF_CPUM_SF_BASIC_MODE)) | |
1092 | sample->basic.def = 0; | |
1093 | if (flags & PERF_CPUM_SF_DIAG_MODE) | |
1094 | sample->diag.def = 0; | |
1095 | } | |
1096 | ||
1097 | static void sfr_store_sample(struct sf_raw_sample *sfr, | |
1098 | struct hws_combined_entry *sample) | |
1099 | { | |
1100 | if (likely(sfr->format & PERF_CPUM_SF_BASIC_MODE)) | |
1101 | sfr->basic = sample->basic; | |
1102 | if (sfr->format & PERF_CPUM_SF_DIAG_MODE) | |
1103 | memcpy(&sfr->diag, &sample->diag, sfr->dsdes); | |
1104 | } | |
1105 | ||
1106 | static void debug_sample_entry(struct hws_combined_entry *sample, | |
1107 | struct hws_trailer_entry *te, | |
1108 | unsigned long flags) | |
1109 | { | |
1110 | debug_sprintf_event(sfdbg, 4, "hw_collect_samples: Found unknown " | |
1111 | "sampling data entry: te->f=%i basic.def=%04x (%p)" | |
1112 | " diag.def=%04x (%p)\n", te->f, | |
1113 | sample->basic.def, &sample->basic, | |
1114 | (flags & PERF_CPUM_SF_DIAG_MODE) | |
1115 | ? sample->diag.def : 0xFFFF, | |
1116 | (flags & PERF_CPUM_SF_DIAG_MODE) | |
1117 | ? &sample->diag : NULL); | |
1118 | } | |
1119 | ||
8c069ff4 HB |
1120 | /* hw_collect_samples() - Walk through a sample-data-block and collect samples |
1121 | * @event: The perf event | |
1122 | * @sdbt: Sample-data-block table | |
1123 | * @overflow: Event overflow counter | |
1124 | * | |
7e75fc3f HB |
1125 | * Walks through a sample-data-block and collects sampling data entries that are |
1126 | * then pushed to the perf event subsystem. Depending on the sampling function, | |
1127 | * there can be either basic-sampling or combined-sampling data entries. A | |
1128 | * combined-sampling data entry consists of a basic- and a diagnostic-sampling | |
1129 | * data entry. The sampling function is determined by the flags in the perf | |
1130 | * event hardware structure. The function always works with a combined-sampling | |
1131 | * data entry but ignores the the diagnostic portion if it is not available. | |
1132 | * | |
1133 | * Note that the implementation focuses on basic-sampling data entries and, if | |
1134 | * such an entry is not valid, the entire combined-sampling data entry is | |
1135 | * ignored. | |
1136 | * | |
1137 | * The overflow variables counts the number of samples that has been discarded | |
1138 | * due to a perf event overflow. | |
8c069ff4 HB |
1139 | */ |
1140 | static void hw_collect_samples(struct perf_event *event, unsigned long *sdbt, | |
1141 | unsigned long long *overflow) | |
1142 | { | |
7e75fc3f HB |
1143 | unsigned long flags = SAMPL_FLAGS(&event->hw); |
1144 | struct hws_combined_entry *sample; | |
1145 | struct hws_trailer_entry *te; | |
1146 | struct sf_raw_sample *sfr; | |
1147 | size_t sample_size; | |
1148 | ||
1149 | /* Prepare and initialize raw sample data */ | |
1150 | sfr = (struct sf_raw_sample *) RAWSAMPLE_REG(&event->hw); | |
1151 | sfr->format = flags & PERF_CPUM_SF_MODE_MASK; | |
8c069ff4 | 1152 | |
7e75fc3f HB |
1153 | sample_size = event_sample_size(&event->hw); |
1154 | te = (struct hws_trailer_entry *) trailer_entry_ptr(*sdbt); | |
1155 | sample = (struct hws_combined_entry *) *sdbt; | |
1156 | while ((unsigned long *) sample < (unsigned long *) te) { | |
8c069ff4 | 1157 | /* Check for an empty sample */ |
7e75fc3f | 1158 | if (!sample->basic.def) |
8c069ff4 HB |
1159 | break; |
1160 | ||
1161 | /* Update perf event period */ | |
1162 | perf_event_count_update(event, SAMPL_RATE(&event->hw)); | |
1163 | ||
7e75fc3f HB |
1164 | /* Check sampling data entry */ |
1165 | if (sample_format_is_valid(sample, flags)) { | |
8c069ff4 HB |
1166 | /* If an event overflow occurred, the PMU is stopped to |
1167 | * throttle event delivery. Remaining sample data is | |
1168 | * discarded. | |
1169 | */ | |
7e75fc3f HB |
1170 | if (!*overflow) { |
1171 | if (sample_is_consistent(sample, flags)) { | |
1172 | /* Deliver sample data to perf */ | |
1173 | sfr_store_sample(sfr, sample); | |
1174 | *overflow = perf_push_sample(event, sfr); | |
1175 | } | |
1176 | } else | |
8c069ff4 HB |
1177 | /* Count discarded samples */ |
1178 | *overflow += 1; | |
7e75fc3f HB |
1179 | } else { |
1180 | debug_sample_entry(sample, te, flags); | |
1181 | /* Sample slot is not yet written or other record. | |
1182 | * | |
1183 | * This condition can occur if the buffer was reused | |
1184 | * from a combined basic- and diagnostic-sampling. | |
1185 | * If only basic-sampling is then active, entries are | |
1186 | * written into the larger diagnostic entries. | |
1187 | * This is typically the case for sample-data-blocks | |
1188 | * that are not full. Stop processing if the first | |
1189 | * invalid format was detected. | |
1190 | */ | |
1191 | if (!te->f) | |
1192 | break; | |
1193 | } | |
8c069ff4 HB |
1194 | |
1195 | /* Reset sample slot and advance to next sample */ | |
7e75fc3f HB |
1196 | reset_sample_slot(sample, flags); |
1197 | sample += sample_size; | |
8c069ff4 HB |
1198 | } |
1199 | } | |
1200 | ||
1201 | /* hw_perf_event_update() - Process sampling buffer | |
1202 | * @event: The perf event | |
1203 | * @flush_all: Flag to also flush partially filled sample-data-blocks | |
1204 | * | |
1205 | * Processes the sampling buffer and create perf event samples. | |
1206 | * The sampling buffer position are retrieved and saved in the TEAR_REG | |
1207 | * register of the specified perf event. | |
1208 | * | |
1209 | * Only full sample-data-blocks are processed. Specify the flash_all flag | |
d7528862 HB |
1210 | * to also walk through partially filled sample-data-blocks. It is ignored |
1211 | * if PERF_CPUM_SF_FULL_BLOCKS is set. The PERF_CPUM_SF_FULL_BLOCKS flag | |
1212 | * enforces the processing of full sample-data-blocks only (trailer entries | |
1213 | * with the block-full-indicator bit set). | |
8c069ff4 HB |
1214 | */ |
1215 | static void hw_perf_event_update(struct perf_event *event, int flush_all) | |
1216 | { | |
1217 | struct hw_perf_event *hwc = &event->hw; | |
1218 | struct hws_trailer_entry *te; | |
1219 | unsigned long *sdbt; | |
fcc77f50 | 1220 | unsigned long long event_overflow, sampl_overflow, num_sdb, te_flags; |
8c069ff4 HB |
1221 | int done; |
1222 | ||
d7528862 HB |
1223 | if (flush_all && SDB_FULL_BLOCKS(hwc)) |
1224 | flush_all = 0; | |
1225 | ||
8c069ff4 | 1226 | sdbt = (unsigned long *) TEAR_REG(hwc); |
69f239ed | 1227 | done = event_overflow = sampl_overflow = num_sdb = 0; |
8c069ff4 HB |
1228 | while (!done) { |
1229 | /* Get the trailer entry of the sample-data-block */ | |
1230 | te = (struct hws_trailer_entry *) trailer_entry_ptr(*sdbt); | |
1231 | ||
1232 | /* Leave loop if no more work to do (block full indicator) */ | |
1233 | if (!te->f) { | |
1234 | done = 1; | |
1235 | if (!flush_all) | |
1236 | break; | |
1237 | } | |
1238 | ||
69f239ed HB |
1239 | /* Check the sample overflow count */ |
1240 | if (te->overflow) | |
1241 | /* Account sample overflows and, if a particular limit | |
1242 | * is reached, extend the sampling buffer. | |
1243 | * For details, see sfb_account_overflows(). | |
8c069ff4 | 1244 | */ |
69f239ed | 1245 | sampl_overflow += te->overflow; |
8c069ff4 HB |
1246 | |
1247 | /* Timestamps are valid for full sample-data-blocks only */ | |
1248 | debug_sprintf_event(sfdbg, 6, "hw_perf_event_update: sdbt=%p " | |
1249 | "overflow=%llu timestamp=0x%llx\n", | |
1250 | sdbt, te->overflow, | |
443d4beb | 1251 | (te->f) ? trailer_timestamp(te) : 0ULL); |
8c069ff4 HB |
1252 | |
1253 | /* Collect all samples from a single sample-data-block and | |
1254 | * flag if an (perf) event overflow happened. If so, the PMU | |
1255 | * is stopped and remaining samples will be discarded. | |
1256 | */ | |
1257 | hw_collect_samples(event, sdbt, &event_overflow); | |
69f239ed | 1258 | num_sdb++; |
8c069ff4 | 1259 | |
fcc77f50 HB |
1260 | /* Reset trailer (using compare-double-and-swap) */ |
1261 | do { | |
1262 | te_flags = te->flags & ~SDB_TE_BUFFER_FULL_MASK; | |
1263 | te_flags |= SDB_TE_ALERT_REQ_MASK; | |
1264 | } while (!cmpxchg_double(&te->flags, &te->overflow, | |
1265 | te->flags, te->overflow, | |
1266 | te_flags, 0ULL)); | |
8c069ff4 HB |
1267 | |
1268 | /* Advance to next sample-data-block */ | |
1269 | sdbt++; | |
1270 | if (is_link_entry(sdbt)) | |
1271 | sdbt = get_next_sdbt(sdbt); | |
1272 | ||
1273 | /* Update event hardware registers */ | |
1274 | TEAR_REG(hwc) = (unsigned long) sdbt; | |
1275 | ||
1276 | /* Stop processing sample-data if all samples of the current | |
1277 | * sample-data-block were flushed even if it was not full. | |
1278 | */ | |
1279 | if (flush_all && done) | |
1280 | break; | |
1281 | ||
1282 | /* If an event overflow happened, discard samples by | |
1283 | * processing any remaining sample-data-blocks. | |
1284 | */ | |
1285 | if (event_overflow) | |
1286 | flush_all = 1; | |
1287 | } | |
1288 | ||
69f239ed HB |
1289 | /* Account sample overflows in the event hardware structure */ |
1290 | if (sampl_overflow) | |
1291 | OVERFLOW_REG(hwc) = DIV_ROUND_UP(OVERFLOW_REG(hwc) + | |
1292 | sampl_overflow, 1 + num_sdb); | |
8c069ff4 HB |
1293 | if (sampl_overflow || event_overflow) |
1294 | debug_sprintf_event(sfdbg, 4, "hw_perf_event_update: " | |
1295 | "overflow stats: sample=%llu event=%llu\n", | |
1296 | sampl_overflow, event_overflow); | |
1297 | } | |
1298 | ||
1299 | static void cpumsf_pmu_read(struct perf_event *event) | |
1300 | { | |
1301 | /* Nothing to do ... updates are interrupt-driven */ | |
1302 | } | |
1303 | ||
1304 | /* Activate sampling control. | |
1305 | * Next call of pmu_enable() starts sampling. | |
1306 | */ | |
1307 | static void cpumsf_pmu_start(struct perf_event *event, int flags) | |
1308 | { | |
eb7e7d76 | 1309 | struct cpu_hw_sf *cpuhw = this_cpu_ptr(&cpu_hw_sf); |
8c069ff4 HB |
1310 | |
1311 | if (WARN_ON_ONCE(!(event->hw.state & PERF_HES_STOPPED))) | |
1312 | return; | |
1313 | ||
1314 | if (flags & PERF_EF_RELOAD) | |
1315 | WARN_ON_ONCE(!(event->hw.state & PERF_HES_UPTODATE)); | |
1316 | ||
1317 | perf_pmu_disable(event->pmu); | |
1318 | event->hw.state = 0; | |
1319 | cpuhw->lsctl.cs = 1; | |
7e75fc3f HB |
1320 | if (SAMPL_DIAG_MODE(&event->hw)) |
1321 | cpuhw->lsctl.cd = 1; | |
8c069ff4 HB |
1322 | perf_pmu_enable(event->pmu); |
1323 | } | |
1324 | ||
1325 | /* Deactivate sampling control. | |
1326 | * Next call of pmu_enable() stops sampling. | |
1327 | */ | |
1328 | static void cpumsf_pmu_stop(struct perf_event *event, int flags) | |
1329 | { | |
eb7e7d76 | 1330 | struct cpu_hw_sf *cpuhw = this_cpu_ptr(&cpu_hw_sf); |
8c069ff4 HB |
1331 | |
1332 | if (event->hw.state & PERF_HES_STOPPED) | |
1333 | return; | |
1334 | ||
1335 | perf_pmu_disable(event->pmu); | |
1336 | cpuhw->lsctl.cs = 0; | |
7e75fc3f | 1337 | cpuhw->lsctl.cd = 0; |
8c069ff4 HB |
1338 | event->hw.state |= PERF_HES_STOPPED; |
1339 | ||
1340 | if ((flags & PERF_EF_UPDATE) && !(event->hw.state & PERF_HES_UPTODATE)) { | |
1341 | hw_perf_event_update(event, 1); | |
1342 | event->hw.state |= PERF_HES_UPTODATE; | |
1343 | } | |
1344 | perf_pmu_enable(event->pmu); | |
1345 | } | |
1346 | ||
1347 | static int cpumsf_pmu_add(struct perf_event *event, int flags) | |
1348 | { | |
eb7e7d76 | 1349 | struct cpu_hw_sf *cpuhw = this_cpu_ptr(&cpu_hw_sf); |
8c069ff4 HB |
1350 | int err; |
1351 | ||
1352 | if (cpuhw->flags & PMU_F_IN_USE) | |
1353 | return -EAGAIN; | |
1354 | ||
1355 | if (!cpuhw->sfb.sdbt) | |
1356 | return -EINVAL; | |
1357 | ||
1358 | err = 0; | |
1359 | perf_pmu_disable(event->pmu); | |
1360 | ||
1361 | event->hw.state = PERF_HES_UPTODATE | PERF_HES_STOPPED; | |
1362 | ||
1363 | /* Set up sampling controls. Always program the sampling register | |
1364 | * using the SDB-table start. Reset TEAR_REG event hardware register | |
1365 | * that is used by hw_perf_event_update() to store the sampling buffer | |
1366 | * position after samples have been flushed. | |
1367 | */ | |
1368 | cpuhw->lsctl.s = 0; | |
1369 | cpuhw->lsctl.h = 1; | |
69f239ed | 1370 | cpuhw->lsctl.tear = (unsigned long) cpuhw->sfb.sdbt; |
8c069ff4 HB |
1371 | cpuhw->lsctl.dear = *(unsigned long *) cpuhw->sfb.sdbt; |
1372 | cpuhw->lsctl.interval = SAMPL_RATE(&event->hw); | |
1373 | hw_reset_registers(&event->hw, cpuhw->sfb.sdbt); | |
1374 | ||
1375 | /* Ensure sampling functions are in the disabled state. If disabled, | |
1376 | * switch on sampling enable control. */ | |
7e75fc3f | 1377 | if (WARN_ON_ONCE(cpuhw->lsctl.es == 1 || cpuhw->lsctl.ed == 1)) { |
8c069ff4 HB |
1378 | err = -EAGAIN; |
1379 | goto out; | |
1380 | } | |
1381 | cpuhw->lsctl.es = 1; | |
7e75fc3f HB |
1382 | if (SAMPL_DIAG_MODE(&event->hw)) |
1383 | cpuhw->lsctl.ed = 1; | |
8c069ff4 HB |
1384 | |
1385 | /* Set in_use flag and store event */ | |
1386 | event->hw.idx = 0; /* only one sampling event per CPU supported */ | |
1387 | cpuhw->event = event; | |
1388 | cpuhw->flags |= PMU_F_IN_USE; | |
1389 | ||
1390 | if (flags & PERF_EF_START) | |
1391 | cpumsf_pmu_start(event, PERF_EF_RELOAD); | |
1392 | out: | |
1393 | perf_event_update_userpage(event); | |
1394 | perf_pmu_enable(event->pmu); | |
1395 | return err; | |
1396 | } | |
1397 | ||
1398 | static void cpumsf_pmu_del(struct perf_event *event, int flags) | |
1399 | { | |
eb7e7d76 | 1400 | struct cpu_hw_sf *cpuhw = this_cpu_ptr(&cpu_hw_sf); |
8c069ff4 HB |
1401 | |
1402 | perf_pmu_disable(event->pmu); | |
1403 | cpumsf_pmu_stop(event, PERF_EF_UPDATE); | |
1404 | ||
1405 | cpuhw->lsctl.es = 0; | |
7e75fc3f | 1406 | cpuhw->lsctl.ed = 0; |
8c069ff4 HB |
1407 | cpuhw->flags &= ~PMU_F_IN_USE; |
1408 | cpuhw->event = NULL; | |
1409 | ||
1410 | perf_event_update_userpage(event); | |
1411 | perf_pmu_enable(event->pmu); | |
1412 | } | |
1413 | ||
8c069ff4 | 1414 | CPUMF_EVENT_ATTR(SF, SF_CYCLES_BASIC, PERF_EVENT_CPUM_SF); |
7e75fc3f | 1415 | CPUMF_EVENT_ATTR(SF, SF_CYCLES_BASIC_DIAG, PERF_EVENT_CPUM_SF_DIAG); |
8c069ff4 HB |
1416 | |
1417 | static struct attribute *cpumsf_pmu_events_attr[] = { | |
1418 | CPUMF_EVENT_PTR(SF, SF_CYCLES_BASIC), | |
7e75fc3f | 1419 | CPUMF_EVENT_PTR(SF, SF_CYCLES_BASIC_DIAG), |
8c069ff4 HB |
1420 | NULL, |
1421 | }; | |
1422 | ||
1423 | PMU_FORMAT_ATTR(event, "config:0-63"); | |
1424 | ||
1425 | static struct attribute *cpumsf_pmu_format_attr[] = { | |
1426 | &format_attr_event.attr, | |
1427 | NULL, | |
1428 | }; | |
1429 | ||
1430 | static struct attribute_group cpumsf_pmu_events_group = { | |
1431 | .name = "events", | |
1432 | .attrs = cpumsf_pmu_events_attr, | |
1433 | }; | |
1434 | static struct attribute_group cpumsf_pmu_format_group = { | |
1435 | .name = "format", | |
1436 | .attrs = cpumsf_pmu_format_attr, | |
1437 | }; | |
1438 | static const struct attribute_group *cpumsf_pmu_attr_groups[] = { | |
1439 | &cpumsf_pmu_events_group, | |
1440 | &cpumsf_pmu_format_group, | |
1441 | NULL, | |
1442 | }; | |
1443 | ||
1444 | static struct pmu cpumf_sampling = { | |
1445 | .pmu_enable = cpumsf_pmu_enable, | |
1446 | .pmu_disable = cpumsf_pmu_disable, | |
1447 | ||
1448 | .event_init = cpumsf_pmu_event_init, | |
1449 | .add = cpumsf_pmu_add, | |
1450 | .del = cpumsf_pmu_del, | |
1451 | ||
1452 | .start = cpumsf_pmu_start, | |
1453 | .stop = cpumsf_pmu_stop, | |
1454 | .read = cpumsf_pmu_read, | |
1455 | ||
8c069ff4 HB |
1456 | .attr_groups = cpumsf_pmu_attr_groups, |
1457 | }; | |
1458 | ||
1459 | static void cpumf_measurement_alert(struct ext_code ext_code, | |
1460 | unsigned int alert, unsigned long unused) | |
1461 | { | |
1462 | struct cpu_hw_sf *cpuhw; | |
1463 | ||
1464 | if (!(alert & CPU_MF_INT_SF_MASK)) | |
1465 | return; | |
1466 | inc_irq_stat(IRQEXT_CMS); | |
eb7e7d76 | 1467 | cpuhw = this_cpu_ptr(&cpu_hw_sf); |
8c069ff4 HB |
1468 | |
1469 | /* Measurement alerts are shared and might happen when the PMU | |
1470 | * is not reserved. Ignore these alerts in this case. */ | |
1471 | if (!(cpuhw->flags & PMU_F_RESERVED)) | |
1472 | return; | |
1473 | ||
1474 | /* The processing below must take care of multiple alert events that | |
1475 | * might be indicated concurrently. */ | |
1476 | ||
1477 | /* Program alert request */ | |
1478 | if (alert & CPU_MF_INT_SF_PRA) { | |
1479 | if (cpuhw->flags & PMU_F_IN_USE) | |
1480 | hw_perf_event_update(cpuhw->event, 0); | |
1481 | else | |
1482 | WARN_ON_ONCE(!(cpuhw->flags & PMU_F_IN_USE)); | |
1483 | } | |
1484 | ||
1485 | /* Report measurement alerts only for non-PRA codes */ | |
1486 | if (alert != CPU_MF_INT_SF_PRA) | |
1487 | debug_sprintf_event(sfdbg, 6, "measurement alert: 0x%x\n", alert); | |
1488 | ||
1489 | /* Sampling authorization change request */ | |
1490 | if (alert & CPU_MF_INT_SF_SACA) | |
1491 | qsi(&cpuhw->qsi); | |
1492 | ||
1493 | /* Loss of sample data due to high-priority machine activities */ | |
1494 | if (alert & CPU_MF_INT_SF_LSDA) { | |
1495 | pr_err("Sample data was lost\n"); | |
1496 | cpuhw->flags |= PMU_F_ERR_LSDA; | |
1497 | sf_disable(); | |
1498 | } | |
1499 | ||
1500 | /* Invalid sampling buffer entry */ | |
1501 | if (alert & (CPU_MF_INT_SF_IAE|CPU_MF_INT_SF_ISE)) { | |
1502 | pr_err("A sampling buffer entry is incorrect (alert=0x%x)\n", | |
1503 | alert); | |
1504 | cpuhw->flags |= PMU_F_ERR_IBE; | |
1505 | sf_disable(); | |
1506 | } | |
1507 | } | |
1508 | ||
28aa39b8 PG |
1509 | static int cpumf_pmu_notifier(struct notifier_block *self, |
1510 | unsigned long action, void *hcpu) | |
8c069ff4 HB |
1511 | { |
1512 | unsigned int cpu = (long) hcpu; | |
1513 | int flags; | |
1514 | ||
1515 | /* Ignore the notification if no events are scheduled on the PMU. | |
1516 | * This might be racy... | |
1517 | */ | |
1518 | if (!atomic_read(&num_events)) | |
1519 | return NOTIFY_OK; | |
1520 | ||
1521 | switch (action & ~CPU_TASKS_FROZEN) { | |
1522 | case CPU_ONLINE: | |
1523 | case CPU_ONLINE_FROZEN: | |
1524 | flags = PMC_INIT; | |
1525 | smp_call_function_single(cpu, setup_pmc_cpu, &flags, 1); | |
1526 | break; | |
1527 | case CPU_DOWN_PREPARE: | |
1528 | flags = PMC_RELEASE; | |
1529 | smp_call_function_single(cpu, setup_pmc_cpu, &flags, 1); | |
1530 | break; | |
1531 | default: | |
1532 | break; | |
1533 | } | |
1534 | ||
1535 | return NOTIFY_OK; | |
1536 | } | |
1537 | ||
69f239ed HB |
1538 | static int param_get_sfb_size(char *buffer, const struct kernel_param *kp) |
1539 | { | |
1540 | if (!cpum_sf_avail()) | |
1541 | return -ENODEV; | |
1542 | return sprintf(buffer, "%lu,%lu", CPUM_SF_MIN_SDB, CPUM_SF_MAX_SDB); | |
1543 | } | |
1544 | ||
1545 | static int param_set_sfb_size(const char *val, const struct kernel_param *kp) | |
1546 | { | |
1547 | int rc; | |
1548 | unsigned long min, max; | |
1549 | ||
1550 | if (!cpum_sf_avail()) | |
1551 | return -ENODEV; | |
1552 | if (!val || !strlen(val)) | |
1553 | return -EINVAL; | |
1554 | ||
1555 | /* Valid parameter values: "min,max" or "max" */ | |
1556 | min = CPUM_SF_MIN_SDB; | |
1557 | max = CPUM_SF_MAX_SDB; | |
1558 | if (strchr(val, ',')) | |
1559 | rc = (sscanf(val, "%lu,%lu", &min, &max) == 2) ? 0 : -EINVAL; | |
1560 | else | |
1561 | rc = kstrtoul(val, 10, &max); | |
1562 | ||
1563 | if (min < 2 || min >= max || max > get_num_physpages()) | |
1564 | rc = -EINVAL; | |
1565 | if (rc) | |
1566 | return rc; | |
1567 | ||
1568 | sfb_set_limits(min, max); | |
7e75fc3f HB |
1569 | pr_info("The sampling buffer limits have changed to: " |
1570 | "min=%lu max=%lu (diag=x%lu)\n", | |
1571 | CPUM_SF_MIN_SDB, CPUM_SF_MAX_SDB, CPUM_SF_SDB_DIAG_FACTOR); | |
69f239ed HB |
1572 | return 0; |
1573 | } | |
1574 | ||
1575 | #define param_check_sfb_size(name, p) __param_check(name, p, void) | |
1576 | static struct kernel_param_ops param_ops_sfb_size = { | |
1577 | .set = param_set_sfb_size, | |
1578 | .get = param_get_sfb_size, | |
1579 | }; | |
1580 | ||
7e75fc3f HB |
1581 | #define RS_INIT_FAILURE_QSI 0x0001 |
1582 | #define RS_INIT_FAILURE_BSDES 0x0002 | |
1583 | #define RS_INIT_FAILURE_ALRT 0x0003 | |
1584 | #define RS_INIT_FAILURE_PERF 0x0004 | |
1585 | static void __init pr_cpumsf_err(unsigned int reason) | |
1586 | { | |
1587 | pr_err("Sampling facility support for perf is not available: " | |
1588 | "reason=%04x\n", reason); | |
1589 | } | |
1590 | ||
8c069ff4 HB |
1591 | static int __init init_cpum_sampling_pmu(void) |
1592 | { | |
7e75fc3f | 1593 | struct hws_qsi_info_block si; |
8c069ff4 HB |
1594 | int err; |
1595 | ||
1596 | if (!cpum_sf_avail()) | |
1597 | return -ENODEV; | |
1598 | ||
7e75fc3f HB |
1599 | memset(&si, 0, sizeof(si)); |
1600 | if (qsi(&si)) { | |
1601 | pr_cpumsf_err(RS_INIT_FAILURE_QSI); | |
1602 | return -ENODEV; | |
1603 | } | |
1604 | ||
1605 | if (si.bsdes != sizeof(struct hws_basic_entry)) { | |
1606 | pr_cpumsf_err(RS_INIT_FAILURE_BSDES); | |
1607 | return -EINVAL; | |
1608 | } | |
1609 | ||
1610 | if (si.ad) | |
1611 | sfb_set_limits(CPUM_SF_MIN_SDB, CPUM_SF_MAX_SDB); | |
1612 | ||
8c069ff4 HB |
1613 | sfdbg = debug_register(KMSG_COMPONENT, 2, 1, 80); |
1614 | if (!sfdbg) | |
1615 | pr_err("Registering for s390dbf failed\n"); | |
1616 | debug_register_view(sfdbg, &debug_sprintf_view); | |
1617 | ||
1dad093b TH |
1618 | err = register_external_irq(EXT_IRQ_MEASURE_ALERT, |
1619 | cpumf_measurement_alert); | |
8c069ff4 | 1620 | if (err) { |
7e75fc3f | 1621 | pr_cpumsf_err(RS_INIT_FAILURE_ALRT); |
8c069ff4 HB |
1622 | goto out; |
1623 | } | |
1624 | ||
1625 | err = perf_pmu_register(&cpumf_sampling, "cpum_sf", PERF_TYPE_RAW); | |
1626 | if (err) { | |
7e75fc3f | 1627 | pr_cpumsf_err(RS_INIT_FAILURE_PERF); |
1dad093b TH |
1628 | unregister_external_irq(EXT_IRQ_MEASURE_ALERT, |
1629 | cpumf_measurement_alert); | |
8c069ff4 HB |
1630 | goto out; |
1631 | } | |
1632 | perf_cpu_notifier(cpumf_pmu_notifier); | |
1633 | out: | |
1634 | return err; | |
1635 | } | |
1636 | arch_initcall(init_cpum_sampling_pmu); | |
69f239ed | 1637 | core_param(cpum_sfb_size, CPUM_SF_MAX_SDB, sfb_size, 0640); |