]> git.proxmox.com Git - mirror_ubuntu-artful-kernel.git/blame - arch/s390/kernel/perf_cpum_sf.c
s390/cpum_sf: Dynamically extend the sampling buffer if overflows occur
[mirror_ubuntu-artful-kernel.git] / arch / s390 / kernel / perf_cpum_sf.c
CommitLineData
8c069ff4
HB
1/*
2 * Performance event support for the System z CPU-measurement Sampling Facility
3 *
4 * Copyright IBM Corp. 2013
5 * Author(s): Hendrik Brueckner <brueckner@linux.vnet.ibm.com>
6 *
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License (version 2 only)
9 * as published by the Free Software Foundation.
10 */
11#define KMSG_COMPONENT "cpum_sf"
12#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
13
14#include <linux/kernel.h>
15#include <linux/kernel_stat.h>
16#include <linux/perf_event.h>
17#include <linux/percpu.h>
18#include <linux/notifier.h>
19#include <linux/export.h>
69f239ed
HB
20#include <linux/mm.h>
21#include <linux/moduleparam.h>
8c069ff4
HB
22#include <asm/cpu_mf.h>
23#include <asm/irq.h>
24#include <asm/debug.h>
25#include <asm/timex.h>
26
27/* Minimum number of sample-data-block-tables:
28 * At least one table is required for the sampling buffer structure.
29 * A single table contains up to 511 pointers to sample-data-blocks.
30 */
69f239ed 31#define CPUM_SF_MIN_SDBT 1
8c069ff4 32
69f239ed
HB
33/* Number of sample-data-blocks per sample-data-block-table (SDBT):
34 * The table contains SDB origin (8 bytes) and one SDBT origin that
35 * points to the next table.
8c069ff4 36 */
69f239ed 37#define CPUM_SF_SDB_PER_TABLE ((PAGE_SIZE - 8) / 8)
8c069ff4 38
69f239ed
HB
39/* Maximum page offset for an SDBT table-link entry:
40 * If this page offset is reached, a table-link entry to the next SDBT
41 * must be added.
42 */
43#define CPUM_SF_SDBT_TL_OFFSET (CPUM_SF_SDB_PER_TABLE * 8)
44static inline int require_table_link(const void *sdbt)
45{
46 return ((unsigned long) sdbt & ~PAGE_MASK) == CPUM_SF_SDBT_TL_OFFSET;
47}
48
49/* Minimum and maximum sampling buffer sizes:
50 *
51 * This number represents the maximum size of the sampling buffer
52 * taking the number of sample-data-block-tables into account.
8c069ff4 53 *
69f239ed
HB
54 * Sampling buffer size Buffer characteristics
55 * ---------------------------------------------------
56 * 64KB == 16 pages (4KB per page)
57 * 1 page for SDB-tables
58 * 15 pages for SDBs
59 *
60 * 32MB == 8192 pages (4KB per page)
61 * 16 pages for SDB-tables
62 * 8176 pages for SDBs
8c069ff4 63 */
69f239ed
HB
64static unsigned long __read_mostly CPUM_SF_MIN_SDB = 15;
65static unsigned long __read_mostly CPUM_SF_MAX_SDB = 8176;
8c069ff4
HB
66
67struct sf_buffer {
69f239ed 68 unsigned long *sdbt; /* Sample-data-block-table origin */
8c069ff4 69 /* buffer characteristics (required for buffer increments) */
69f239ed
HB
70 unsigned long num_sdb; /* Number of sample-data-blocks */
71 unsigned long num_sdbt; /* Number of sample-data-block-tables */
72 unsigned long *tail; /* last sample-data-block-table */
8c069ff4
HB
73};
74
75struct cpu_hw_sf {
76 /* CPU-measurement sampling information block */
77 struct hws_qsi_info_block qsi;
69f239ed 78 /* CPU-measurement sampling control block */
8c069ff4
HB
79 struct hws_lsctl_request_block lsctl;
80 struct sf_buffer sfb; /* Sampling buffer */
81 unsigned int flags; /* Status flags */
82 struct perf_event *event; /* Scheduled perf event */
83};
84static DEFINE_PER_CPU(struct cpu_hw_sf, cpu_hw_sf);
85
86/* Debug feature */
87static debug_info_t *sfdbg;
88
69f239ed
HB
89/*
90 * sf_disable() - Switch off sampling facility
91 */
92static int sf_disable(void)
93{
94 struct hws_lsctl_request_block sreq;
95
96 memset(&sreq, 0, sizeof(sreq));
97 return lsctl(&sreq);
98}
99
8c069ff4
HB
100/*
101 * sf_buffer_available() - Check for an allocated sampling buffer
102 */
103static int sf_buffer_available(struct cpu_hw_sf *cpuhw)
104{
69f239ed 105 return !!cpuhw->sfb.sdbt;
8c069ff4
HB
106}
107
108/*
109 * deallocate sampling facility buffer
110 */
111static void free_sampling_buffer(struct sf_buffer *sfb)
112{
69f239ed 113 unsigned long *sdbt, *curr;
8c069ff4
HB
114
115 if (!sfb->sdbt)
116 return;
117
118 sdbt = sfb->sdbt;
69f239ed 119 curr = sdbt;
8c069ff4 120
69f239ed 121 /* Free the SDBT after all SDBs are processed... */
8c069ff4
HB
122 while (1) {
123 if (!*curr || !sdbt)
124 break;
125
69f239ed 126 /* Process table-link entries */
8c069ff4
HB
127 if (is_link_entry(curr)) {
128 curr = get_next_sdbt(curr);
129 if (sdbt)
69f239ed 130 free_page((unsigned long) sdbt);
8c069ff4 131
69f239ed
HB
132 /* If the origin is reached, sampling buffer is freed */
133 if (curr == sfb->sdbt)
8c069ff4
HB
134 break;
135 else
69f239ed 136 sdbt = curr;
8c069ff4 137 } else {
69f239ed 138 /* Process SDB pointer */
8c069ff4
HB
139 if (*curr) {
140 free_page(*curr);
141 curr++;
142 }
143 }
144 }
145
146 debug_sprintf_event(sfdbg, 5,
69f239ed 147 "free_sampling_buffer: freed sdbt=%p\n", sfb->sdbt);
8c069ff4
HB
148 memset(sfb, 0, sizeof(*sfb));
149}
150
69f239ed
HB
151static int alloc_sample_data_block(unsigned long *sdbt, gfp_t gfp_flags)
152{
153 unsigned long sdb, *trailer;
154
155 /* Allocate and initialize sample-data-block */
156 sdb = get_zeroed_page(gfp_flags);
157 if (!sdb)
158 return -ENOMEM;
159 trailer = trailer_entry_ptr(sdb);
160 *trailer = SDB_TE_ALERT_REQ_MASK;
161
162 /* Link SDB into the sample-data-block-table */
163 *sdbt = sdb;
164
165 return 0;
166}
167
168/*
169 * realloc_sampling_buffer() - extend sampler memory
170 *
171 * Allocates new sample-data-blocks and adds them to the specified sampling
172 * buffer memory.
173 *
174 * Important: This modifies the sampling buffer and must be called when the
175 * sampling facility is disabled.
176 *
177 * Returns zero on success, non-zero otherwise.
178 */
179static int realloc_sampling_buffer(struct sf_buffer *sfb,
180 unsigned long num_sdb, gfp_t gfp_flags)
181{
182 int i, rc;
183 unsigned long *new, *tail;
184
185 if (!sfb->sdbt || !sfb->tail)
186 return -EINVAL;
187
188 if (!is_link_entry(sfb->tail))
189 return -EINVAL;
190
191 /* Append to the existing sampling buffer, overwriting the table-link
192 * register.
193 * The tail variables always points to the "tail" (last and table-link)
194 * entry in an SDB-table.
195 */
196 tail = sfb->tail;
197
198 /* Do a sanity check whether the table-link entry points to
199 * the sampling buffer origin.
200 */
201 if (sfb->sdbt != get_next_sdbt(tail)) {
202 debug_sprintf_event(sfdbg, 3, "realloc_sampling_buffer: "
203 "sampling buffer is not linked: origin=%p"
204 "tail=%p\n",
205 (void *) sfb->sdbt, (void *) tail);
206 return -EINVAL;
207 }
208
209 /* Allocate remaining SDBs */
210 rc = 0;
211 for (i = 0; i < num_sdb; i++) {
212 /* Allocate a new SDB-table if it is full. */
213 if (require_table_link(tail)) {
214 new = (unsigned long *) get_zeroed_page(gfp_flags);
215 if (!new) {
216 rc = -ENOMEM;
217 break;
218 }
219 sfb->num_sdbt++;
220 /* Link current page to tail of chain */
221 *tail = (unsigned long)(void *) new + 1;
222 tail = new;
223 }
224
225 /* Allocate a new sample-data-block.
226 * If there is not enough memory, stop the realloc process
227 * and simply use what was allocated. If this is a temporary
228 * issue, a new realloc call (if required) might succeed.
229 */
230 rc = alloc_sample_data_block(tail, gfp_flags);
231 if (rc)
232 break;
233 sfb->num_sdb++;
234 tail++;
235 }
236
237 /* Link sampling buffer to its origin */
238 *tail = (unsigned long) sfb->sdbt + 1;
239 sfb->tail = tail;
240
241 debug_sprintf_event(sfdbg, 4, "realloc_sampling_buffer: new buffer"
242 " settings: sdbt=%lu sdb=%lu\n",
243 sfb->num_sdbt, sfb->num_sdb);
244 return rc;
245}
246
8c069ff4
HB
247/*
248 * allocate_sampling_buffer() - allocate sampler memory
249 *
250 * Allocates and initializes a sampling buffer structure using the
251 * specified number of sample-data-blocks (SDB). For each allocation,
252 * a 4K page is used. The number of sample-data-block-tables (SDBT)
253 * are calculated from SDBs.
254 * Also set the ALERT_REQ mask in each SDBs trailer.
255 *
256 * Returns zero on success, non-zero otherwise.
257 */
258static int alloc_sampling_buffer(struct sf_buffer *sfb, unsigned long num_sdb)
259{
69f239ed 260 int rc;
8c069ff4
HB
261
262 if (sfb->sdbt)
263 return -EINVAL;
69f239ed
HB
264
265 /* Allocate the sample-data-block-table origin */
266 sfb->sdbt = (unsigned long *) get_zeroed_page(GFP_KERNEL);
267 if (!sfb->sdbt)
268 return -ENOMEM;
8c069ff4 269 sfb->num_sdb = 0;
69f239ed 270 sfb->num_sdbt = 1;
8c069ff4 271
69f239ed
HB
272 /* Link the table origin to point to itself to prepare for
273 * realloc_sampling_buffer() invocation.
274 */
275 sfb->tail = sfb->sdbt;
276 *sfb->tail = (unsigned long)(void *) sfb->sdbt + 1;
8c069ff4 277
69f239ed
HB
278 /* Allocate requested number of sample-data-blocks */
279 rc = realloc_sampling_buffer(sfb, num_sdb, GFP_KERNEL);
280 if (rc) {
281 free_sampling_buffer(sfb);
282 debug_sprintf_event(sfdbg, 4, "alloc_sampling_buffer: "
283 "realloc_sampling_buffer failed with rc=%i\n", rc);
284 } else
285 debug_sprintf_event(sfdbg, 4,
286 "alloc_sampling_buffer: tear=%p dear=%p\n",
287 sfb->sdbt, (void *) *sfb->sdbt);
288 return rc;
289}
8c069ff4 290
69f239ed
HB
291static void sfb_set_limits(unsigned long min, unsigned long max)
292{
293 CPUM_SF_MIN_SDB = min;
294 CPUM_SF_MAX_SDB = max;
295}
8c069ff4 296
69f239ed
HB
297static unsigned long sfb_pending_allocs(struct sf_buffer *sfb,
298 struct hw_perf_event *hwc)
299{
300 if (!sfb->sdbt)
301 return SFB_ALLOC_REG(hwc);
302 if (SFB_ALLOC_REG(hwc) > sfb->num_sdb)
303 return SFB_ALLOC_REG(hwc) - sfb->num_sdb;
304 return 0;
305}
8c069ff4 306
69f239ed
HB
307static int sfb_has_pending_allocs(struct sf_buffer *sfb,
308 struct hw_perf_event *hwc)
309{
310 return sfb_pending_allocs(sfb, hwc) > 0;
311}
8c069ff4 312
69f239ed
HB
313static void sfb_account_allocs(unsigned long num, struct hw_perf_event *hwc)
314{
315 /* Limit the number SDBs to not exceed the maximum */
316 num = min_t(unsigned long, num, CPUM_SF_MAX_SDB - SFB_ALLOC_REG(hwc));
317 if (num)
318 SFB_ALLOC_REG(hwc) += num;
8c069ff4
HB
319}
320
69f239ed
HB
321static void sfb_init_allocs(unsigned long num, struct hw_perf_event *hwc)
322{
323 SFB_ALLOC_REG(hwc) = 0;
324 sfb_account_allocs(num, hwc);
325}
326
327static int allocate_sdbt(struct cpu_hw_sf *cpuhw, struct hw_perf_event *hwc)
8c069ff4
HB
328{
329 unsigned long n_sdb, freq;
330 unsigned long factor;
331
332 /* Calculate sampling buffers using 4K pages
333 *
334 * 1. Use frequency as input. The samping buffer is designed for
335 * a complete second. This can be adjusted through the "factor"
336 * variable.
337 * In any case, alloc_sampling_buffer() sets the Alert Request
338 * Control indicator to trigger measurement-alert to harvest
339 * sample-data-blocks (sdb).
340 *
341 * 2. Compute the number of sample-data-blocks and ensure a minimum
342 * of CPUM_SF_MIN_SDB. Also ensure the upper limit does not
343 * exceed CPUM_SF_MAX_SDB. See also the remarks for these
344 * symbolic constants.
345 *
346 * 3. Compute number of pages used for the sample-data-block-table
347 * and ensure a minimum of CPUM_SF_MIN_SDBT (at minimum one table
348 * to manage up to 511 sample-data-blocks).
349 */
350 freq = sample_rate_to_freq(&cpuhw->qsi, SAMPL_RATE(hwc));
351 factor = 1;
352 n_sdb = DIV_ROUND_UP(freq, factor * ((PAGE_SIZE-64) / cpuhw->qsi.bsdes));
353 if (n_sdb < CPUM_SF_MIN_SDB)
354 n_sdb = CPUM_SF_MIN_SDB;
355
69f239ed
HB
356 /* If there is already a sampling buffer allocated, it is very likely
357 * that the sampling facility is enabled too. If the event to be
358 * initialized requires a greater sampling buffer, the allocation must
359 * be postponed. Changing the sampling buffer requires the sampling
360 * facility to be in the disabled state. So, account the number of
361 * required SDBs and let cpumsf_pmu_enable() resize the buffer just
362 * before the event is started.
8c069ff4 363 */
69f239ed 364 sfb_init_allocs(n_sdb, hwc);
8c069ff4
HB
365 if (sf_buffer_available(cpuhw))
366 return 0;
367
368 debug_sprintf_event(sfdbg, 3,
69f239ed 369 "allocate_sdbt: rate=%lu f=%lu sdb=%lu/%lu cpuhw=%p\n",
8c069ff4
HB
370 SAMPL_RATE(hwc), freq, n_sdb, CPUM_SF_MAX_SDB, cpuhw);
371
372 return alloc_sampling_buffer(&cpuhw->sfb,
69f239ed 373 sfb_pending_allocs(&cpuhw->sfb, hwc));
8c069ff4
HB
374}
375
69f239ed
HB
376static unsigned long min_percent(unsigned int percent, unsigned long base,
377 unsigned long min)
378{
379 return min_t(unsigned long, min, DIV_ROUND_UP(percent * base, 100));
380}
8c069ff4 381
69f239ed
HB
382static unsigned long compute_sfb_extent(unsigned long ratio, unsigned long base)
383{
384 /* Use a percentage-based approach to extend the sampling facility
385 * buffer. Accept up to 5% sample data loss.
386 * Vary the extents between 1% to 5% of the current number of
387 * sample-data-blocks.
388 */
389 if (ratio <= 5)
390 return 0;
391 if (ratio <= 25)
392 return min_percent(1, base, 1);
393 if (ratio <= 50)
394 return min_percent(1, base, 1);
395 if (ratio <= 75)
396 return min_percent(2, base, 2);
397 if (ratio <= 100)
398 return min_percent(3, base, 3);
399 if (ratio <= 250)
400 return min_percent(4, base, 4);
401
402 return min_percent(5, base, 8);
403}
8c069ff4 404
69f239ed
HB
405static void sfb_account_overflows(struct cpu_hw_sf *cpuhw,
406 struct hw_perf_event *hwc)
407{
408 unsigned long ratio, num;
409
410 if (!OVERFLOW_REG(hwc))
411 return;
412
413 /* The sample_overflow contains the average number of sample data
414 * that has been lost because sample-data-blocks were full.
415 *
416 * Calculate the total number of sample data entries that has been
417 * discarded. Then calculate the ratio of lost samples to total samples
418 * per second in percent.
419 */
420 ratio = DIV_ROUND_UP(100 * OVERFLOW_REG(hwc) * cpuhw->sfb.num_sdb,
421 sample_rate_to_freq(&cpuhw->qsi, SAMPL_RATE(hwc)));
422
423 /* Compute number of sample-data-blocks */
424 num = compute_sfb_extent(ratio, cpuhw->sfb.num_sdb);
425 if (num)
426 sfb_account_allocs(num, hwc);
427
428 debug_sprintf_event(sfdbg, 5, "sfb: overflow: overflow=%llu ratio=%lu"
429 " num=%lu\n", OVERFLOW_REG(hwc), ratio, num);
430 OVERFLOW_REG(hwc) = 0;
431}
432
433/* extend_sampling_buffer() - Extend sampling buffer
434 * @sfb: Sampling buffer structure (for local CPU)
435 * @hwc: Perf event hardware structure
436 *
437 * Use this function to extend the sampling buffer based on the overflow counter
438 * and postponed allocation extents stored in the specified Perf event hardware.
439 *
440 * Important: This function disables the sampling facility in order to safely
441 * change the sampling buffer structure. Do not call this function
442 * when the PMU is active.
8c069ff4 443 */
69f239ed
HB
444static void extend_sampling_buffer(struct sf_buffer *sfb,
445 struct hw_perf_event *hwc)
8c069ff4 446{
69f239ed
HB
447 unsigned long num, num_old;
448 int rc;
8c069ff4 449
69f239ed
HB
450 num = sfb_pending_allocs(sfb, hwc);
451 if (!num)
452 return;
453 num_old = sfb->num_sdb;
454
455 /* Disable the sampling facility to reset any states and also
456 * clear pending measurement alerts.
457 */
458 sf_disable();
459
460 /* Extend the sampling buffer.
461 * This memory allocation typically happens in an atomic context when
462 * called by perf. Because this is a reallocation, it is fine if the
463 * new SDB-request cannot be satisfied immediately.
464 */
465 rc = realloc_sampling_buffer(sfb, num, GFP_ATOMIC);
466 if (rc)
467 debug_sprintf_event(sfdbg, 5, "sfb: extend: realloc "
468 "failed with rc=%i\n", rc);
469
470 if (sfb_has_pending_allocs(sfb, hwc))
471 debug_sprintf_event(sfdbg, 5, "sfb: extend: "
472 "req=%lu alloc=%lu remaining=%lu\n",
473 num, sfb->num_sdb - num_old,
474 sfb_pending_allocs(sfb, hwc));
8c069ff4
HB
475}
476
477
69f239ed
HB
478/* Number of perf events counting hardware events */
479static atomic_t num_events;
480/* Used to avoid races in calling reserve/release_cpumf_hardware */
481static DEFINE_MUTEX(pmc_reserve_mutex);
482
8c069ff4
HB
483#define PMC_INIT 0
484#define PMC_RELEASE 1
e28bb79d 485#define PMC_FAILURE 2
8c069ff4
HB
486static void setup_pmc_cpu(void *flags)
487{
488 int err;
489 struct cpu_hw_sf *cpusf = &__get_cpu_var(cpu_hw_sf);
490
8c069ff4
HB
491 err = 0;
492 switch (*((int *) flags)) {
493 case PMC_INIT:
494 memset(cpusf, 0, sizeof(*cpusf));
495 err = qsi(&cpusf->qsi);
496 if (err)
497 break;
498 cpusf->flags |= PMU_F_RESERVED;
499 err = sf_disable();
500 if (err)
501 pr_err("Switching off the sampling facility failed "
502 "with rc=%i\n", err);
503 debug_sprintf_event(sfdbg, 5,
504 "setup_pmc_cpu: initialized: cpuhw=%p\n", cpusf);
505 break;
506 case PMC_RELEASE:
507 cpusf->flags &= ~PMU_F_RESERVED;
508 err = sf_disable();
509 if (err) {
510 pr_err("Switching off the sampling facility failed "
511 "with rc=%i\n", err);
512 } else {
513 if (cpusf->sfb.sdbt)
514 free_sampling_buffer(&cpusf->sfb);
515 }
516 debug_sprintf_event(sfdbg, 5,
517 "setup_pmc_cpu: released: cpuhw=%p\n", cpusf);
518 break;
519 }
e28bb79d
HB
520 if (err)
521 *((int *) flags) |= PMC_FAILURE;
8c069ff4
HB
522}
523
524static void release_pmc_hardware(void)
525{
526 int flags = PMC_RELEASE;
527
528 irq_subclass_unregister(IRQ_SUBCLASS_MEASUREMENT_ALERT);
529 on_each_cpu(setup_pmc_cpu, &flags, 1);
e28bb79d 530 perf_release_sampling();
8c069ff4
HB
531}
532
533static int reserve_pmc_hardware(void)
534{
535 int flags = PMC_INIT;
e28bb79d 536 int err;
8c069ff4 537
e28bb79d
HB
538 err = perf_reserve_sampling();
539 if (err)
540 return err;
8c069ff4 541 on_each_cpu(setup_pmc_cpu, &flags, 1);
e28bb79d
HB
542 if (flags & PMC_FAILURE) {
543 release_pmc_hardware();
544 return -ENODEV;
545 }
8c069ff4
HB
546 irq_subclass_register(IRQ_SUBCLASS_MEASUREMENT_ALERT);
547
548 return 0;
549}
550
551static void hw_perf_event_destroy(struct perf_event *event)
552{
553 /* Release PMC if this is the last perf event */
554 if (!atomic_add_unless(&num_events, -1, 1)) {
555 mutex_lock(&pmc_reserve_mutex);
556 if (atomic_dec_return(&num_events) == 0)
557 release_pmc_hardware();
558 mutex_unlock(&pmc_reserve_mutex);
559 }
560}
561
562static void hw_init_period(struct hw_perf_event *hwc, u64 period)
563{
564 hwc->sample_period = period;
565 hwc->last_period = hwc->sample_period;
566 local64_set(&hwc->period_left, hwc->sample_period);
567}
568
569static void hw_reset_registers(struct hw_perf_event *hwc,
69f239ed 570 unsigned long *sdbt_origin)
8c069ff4 571{
69f239ed
HB
572 /* (Re)set to first sample-data-block-table */
573 TEAR_REG(hwc) = (unsigned long) sdbt_origin;
8c069ff4
HB
574}
575
576static unsigned long hw_limit_rate(const struct hws_qsi_info_block *si,
577 unsigned long rate)
578{
69f239ed
HB
579 return clamp_t(unsigned long, rate,
580 si->min_sampl_rate, si->max_sampl_rate);
8c069ff4
HB
581}
582
583static int __hw_perf_event_init(struct perf_event *event)
584{
585 struct cpu_hw_sf *cpuhw;
586 struct hws_qsi_info_block si;
587 struct perf_event_attr *attr = &event->attr;
588 struct hw_perf_event *hwc = &event->hw;
589 unsigned long rate;
590 int cpu, err;
591
592 /* Reserve CPU-measurement sampling facility */
593 err = 0;
594 if (!atomic_inc_not_zero(&num_events)) {
595 mutex_lock(&pmc_reserve_mutex);
596 if (atomic_read(&num_events) == 0 && reserve_pmc_hardware())
597 err = -EBUSY;
598 else
599 atomic_inc(&num_events);
600 mutex_unlock(&pmc_reserve_mutex);
601 }
602 event->destroy = hw_perf_event_destroy;
603
604 if (err)
605 goto out;
606
607 /* Access per-CPU sampling information (query sampling info) */
608 /*
609 * The event->cpu value can be -1 to count on every CPU, for example,
610 * when attaching to a task. If this is specified, use the query
611 * sampling info from the current CPU, otherwise use event->cpu to
612 * retrieve the per-CPU information.
613 * Later, cpuhw indicates whether to allocate sampling buffers for a
614 * particular CPU (cpuhw!=NULL) or each online CPU (cpuw==NULL).
615 */
616 memset(&si, 0, sizeof(si));
617 cpuhw = NULL;
618 if (event->cpu == -1)
619 qsi(&si);
620 else {
621 /* Event is pinned to a particular CPU, retrieve the per-CPU
622 * sampling structure for accessing the CPU-specific QSI.
623 */
624 cpuhw = &per_cpu(cpu_hw_sf, event->cpu);
625 si = cpuhw->qsi;
626 }
627
628 /* Check sampling facility authorization and, if not authorized,
629 * fall back to other PMUs. It is safe to check any CPU because
630 * the authorization is identical for all configured CPUs.
631 */
632 if (!si.as) {
633 err = -ENOENT;
634 goto out;
635 }
636
637 /* The sampling information (si) contains information about the
638 * min/max sampling intervals and the CPU speed. So calculate the
639 * correct sampling interval and avoid the whole period adjust
640 * feedback loop.
641 */
642 rate = 0;
643 if (attr->freq) {
644 rate = freq_to_sample_rate(&si, attr->sample_freq);
645 rate = hw_limit_rate(&si, rate);
646 attr->freq = 0;
647 attr->sample_period = rate;
648 } else {
649 /* The min/max sampling rates specifies the valid range
650 * of sample periods. If the specified sample period is
651 * out of range, limit the period to the range boundary.
652 */
653 rate = hw_limit_rate(&si, hwc->sample_period);
654
655 /* The perf core maintains a maximum sample rate that is
656 * configurable through the sysctl interface. Ensure the
657 * sampling rate does not exceed this value. This also helps
658 * to avoid throttling when pushing samples with
659 * perf_event_overflow().
660 */
661 if (sample_rate_to_freq(&si, rate) >
662 sysctl_perf_event_sample_rate) {
663 err = -EINVAL;
664 debug_sprintf_event(sfdbg, 1, "Sampling rate exceeds maximum perf sample rate\n");
665 goto out;
666 }
667 }
668 SAMPL_RATE(hwc) = rate;
669 hw_init_period(hwc, SAMPL_RATE(hwc));
670
69f239ed
HB
671 /* Initialize sample data overflow accounting */
672 hwc->extra_reg.reg = REG_OVERFLOW;
673 OVERFLOW_REG(hwc) = 0;
674
8c069ff4
HB
675 /* Allocate the per-CPU sampling buffer using the CPU information
676 * from the event. If the event is not pinned to a particular
677 * CPU (event->cpu == -1; or cpuhw == NULL), allocate sampling
678 * buffers for each online CPU.
679 */
680 if (cpuhw)
681 /* Event is pinned to a particular CPU */
682 err = allocate_sdbt(cpuhw, hwc);
683 else {
684 /* Event is not pinned, allocate sampling buffer on
685 * each online CPU
686 */
687 for_each_online_cpu(cpu) {
688 cpuhw = &per_cpu(cpu_hw_sf, cpu);
689 err = allocate_sdbt(cpuhw, hwc);
690 if (err)
691 break;
692 }
693 }
694out:
695 return err;
696}
697
698static int cpumsf_pmu_event_init(struct perf_event *event)
699{
700 int err;
701
55baa2f8
HB
702 /* No support for taken branch sampling */
703 if (has_branch_stack(event))
704 return -EOPNOTSUPP;
705
706 switch (event->attr.type) {
707 case PERF_TYPE_RAW:
708 if (event->attr.config != PERF_EVENT_CPUM_SF)
709 return -ENOENT;
710 break;
711 case PERF_TYPE_HARDWARE:
712 /* Support sampling of CPU cycles in addition to the
713 * counter facility. However, the counter facility
714 * is more precise and, hence, restrict this PMU to
715 * sampling events only.
716 */
717 if (event->attr.config != PERF_COUNT_HW_CPU_CYCLES)
718 return -ENOENT;
719 if (!is_sampling_event(event))
720 return -ENOENT;
721 break;
722 default:
8c069ff4 723 return -ENOENT;
55baa2f8 724 }
8c069ff4
HB
725
726 if (event->cpu >= nr_cpumask_bits ||
727 (event->cpu >= 0 && !cpu_online(event->cpu)))
728 return -ENODEV;
729
730 err = __hw_perf_event_init(event);
731 if (unlikely(err))
732 if (event->destroy)
733 event->destroy(event);
734 return err;
735}
736
737static void cpumsf_pmu_enable(struct pmu *pmu)
738{
739 struct cpu_hw_sf *cpuhw = &__get_cpu_var(cpu_hw_sf);
69f239ed 740 struct hw_perf_event *hwc;
8c069ff4
HB
741 int err;
742
743 if (cpuhw->flags & PMU_F_ENABLED)
744 return;
745
746 if (cpuhw->flags & PMU_F_ERR_MASK)
747 return;
748
69f239ed
HB
749 /* Check whether to extent the sampling buffer.
750 *
751 * Two conditions trigger an increase of the sampling buffer for a
752 * perf event:
753 * 1. Postponed buffer allocations from the event initialization.
754 * 2. Sampling overflows that contribute to pending allocations.
755 *
756 * Note that the extend_sampling_buffer() function disables the sampling
757 * facility, but it can be fully re-enabled using sampling controls that
758 * have been saved in cpumsf_pmu_disable().
759 */
760 if (cpuhw->event) {
761 hwc = &cpuhw->event->hw;
762 /* Account number of overflow-designated buffer extents */
763 sfb_account_overflows(cpuhw, hwc);
764 if (sfb_has_pending_allocs(&cpuhw->sfb, hwc))
765 extend_sampling_buffer(&cpuhw->sfb, hwc);
766 }
767
768 /* (Re)enable the PMU and sampling facility */
8c069ff4
HB
769 cpuhw->flags |= PMU_F_ENABLED;
770 barrier();
771
772 err = lsctl(&cpuhw->lsctl);
773 if (err) {
774 cpuhw->flags &= ~PMU_F_ENABLED;
775 pr_err("Loading sampling controls failed: op=%i err=%i\n",
776 1, err);
777 return;
778 }
779
780 debug_sprintf_event(sfdbg, 6, "pmu_enable: es=%i cs=%i tear=%p dear=%p\n",
781 cpuhw->lsctl.es, cpuhw->lsctl.cs,
782 (void *) cpuhw->lsctl.tear, (void *) cpuhw->lsctl.dear);
783}
784
785static void cpumsf_pmu_disable(struct pmu *pmu)
786{
787 struct cpu_hw_sf *cpuhw = &__get_cpu_var(cpu_hw_sf);
788 struct hws_lsctl_request_block inactive;
789 struct hws_qsi_info_block si;
790 int err;
791
792 if (!(cpuhw->flags & PMU_F_ENABLED))
793 return;
794
795 if (cpuhw->flags & PMU_F_ERR_MASK)
796 return;
797
798 /* Switch off sampling activation control */
799 inactive = cpuhw->lsctl;
800 inactive.cs = 0;
801
802 err = lsctl(&inactive);
803 if (err) {
804 pr_err("Loading sampling controls failed: op=%i err=%i\n",
805 2, err);
806 return;
807 }
808
809 /* Save state of TEAR and DEAR register contents */
810 if (!qsi(&si)) {
811 /* TEAR/DEAR values are valid only if the sampling facility is
812 * enabled. Note that cpumsf_pmu_disable() might be called even
813 * for a disabled sampling facility because cpumsf_pmu_enable()
814 * controls the enable/disable state.
815 */
816 if (si.es) {
817 cpuhw->lsctl.tear = si.tear;
818 cpuhw->lsctl.dear = si.dear;
819 }
820 } else
821 debug_sprintf_event(sfdbg, 3, "cpumsf_pmu_disable: "
822 "qsi() failed with err=%i\n", err);
823
824 cpuhw->flags &= ~PMU_F_ENABLED;
825}
826
827/* perf_push_sample() - Push samples to perf
828 * @event: The perf event
829 * @sample: Hardware sample data
830 *
831 * Use the hardware sample data to create perf event sample. The sample
832 * is the pushed to the event subsystem and the function checks for
833 * possible event overflows. If an event overflow occurs, the PMU is
834 * stopped.
835 *
836 * Return non-zero if an event overflow occurred.
837 */
838static int perf_push_sample(struct perf_event *event,
839 struct hws_data_entry *sample)
840{
841 int overflow;
842 struct pt_regs regs;
843 struct perf_sample_data data;
844
845 /* Skip samples that are invalid or for which the instruction address
846 * is not predictable. For the latter, the wait-state bit is set.
847 */
848 if (sample->I || sample->W)
849 return 0;
850
851 perf_sample_data_init(&data, 0, event->hw.last_period);
852
853 memset(&regs, 0, sizeof(regs));
854 regs.psw.addr = sample->ia;
855 if (sample->T)
856 regs.psw.mask |= PSW_MASK_DAT;
857 if (sample->W)
858 regs.psw.mask |= PSW_MASK_WAIT;
859 if (sample->P)
860 regs.psw.mask |= PSW_MASK_PSTATE;
861 switch (sample->AS) {
862 case 0x0:
863 regs.psw.mask |= PSW_ASC_PRIMARY;
864 break;
865 case 0x1:
866 regs.psw.mask |= PSW_ASC_ACCREG;
867 break;
868 case 0x2:
869 regs.psw.mask |= PSW_ASC_SECONDARY;
870 break;
871 case 0x3:
872 regs.psw.mask |= PSW_ASC_HOME;
873 break;
874 }
875
876 overflow = 0;
877 if (perf_event_overflow(event, &data, &regs)) {
878 overflow = 1;
879 event->pmu->stop(event, 0);
8c069ff4
HB
880 }
881 perf_event_update_userpage(event);
882
883 return overflow;
884}
885
886static void perf_event_count_update(struct perf_event *event, u64 count)
887{
888 local64_add(count, &event->count);
889}
890
891/* hw_collect_samples() - Walk through a sample-data-block and collect samples
892 * @event: The perf event
893 * @sdbt: Sample-data-block table
894 * @overflow: Event overflow counter
895 *
896 * Walks through a sample-data-block and collects hardware sample-data that is
897 * pushed to the perf event subsystem. The overflow reports the number of
898 * samples that has been discarded due to an event overflow.
899 */
900static void hw_collect_samples(struct perf_event *event, unsigned long *sdbt,
901 unsigned long long *overflow)
902{
903 struct hws_data_entry *sample;
904 unsigned long *trailer;
905
906 trailer = trailer_entry_ptr(*sdbt);
907 sample = (struct hws_data_entry *) *sdbt;
908 while ((unsigned long *) sample < trailer) {
909 /* Check for an empty sample */
910 if (!sample->def)
911 break;
912
913 /* Update perf event period */
914 perf_event_count_update(event, SAMPL_RATE(&event->hw));
915
916 /* Check for basic sampling mode */
917 if (sample->def == 0x0001) {
918 /* If an event overflow occurred, the PMU is stopped to
919 * throttle event delivery. Remaining sample data is
920 * discarded.
921 */
922 if (!*overflow)
923 *overflow = perf_push_sample(event, sample);
924 else
925 /* Count discarded samples */
926 *overflow += 1;
927 } else
928 /* Sample slot is not yet written or other record */
929 debug_sprintf_event(sfdbg, 5, "hw_collect_samples: "
930 "Unknown sample data entry format:"
931 " %i\n", sample->def);
932
933 /* Reset sample slot and advance to next sample */
934 sample->def = 0;
935 sample++;
936 }
937}
938
939/* hw_perf_event_update() - Process sampling buffer
940 * @event: The perf event
941 * @flush_all: Flag to also flush partially filled sample-data-blocks
942 *
943 * Processes the sampling buffer and create perf event samples.
944 * The sampling buffer position are retrieved and saved in the TEAR_REG
945 * register of the specified perf event.
946 *
947 * Only full sample-data-blocks are processed. Specify the flash_all flag
948 * to also walk through partially filled sample-data-blocks.
949 *
950 */
951static void hw_perf_event_update(struct perf_event *event, int flush_all)
952{
953 struct hw_perf_event *hwc = &event->hw;
954 struct hws_trailer_entry *te;
955 unsigned long *sdbt;
69f239ed 956 unsigned long long event_overflow, sampl_overflow, num_sdb;
8c069ff4
HB
957 int done;
958
959 sdbt = (unsigned long *) TEAR_REG(hwc);
69f239ed 960 done = event_overflow = sampl_overflow = num_sdb = 0;
8c069ff4
HB
961 while (!done) {
962 /* Get the trailer entry of the sample-data-block */
963 te = (struct hws_trailer_entry *) trailer_entry_ptr(*sdbt);
964
965 /* Leave loop if no more work to do (block full indicator) */
966 if (!te->f) {
967 done = 1;
968 if (!flush_all)
969 break;
970 }
971
69f239ed
HB
972 /* Check the sample overflow count */
973 if (te->overflow)
974 /* Account sample overflows and, if a particular limit
975 * is reached, extend the sampling buffer.
976 * For details, see sfb_account_overflows().
8c069ff4 977 */
69f239ed 978 sampl_overflow += te->overflow;
8c069ff4
HB
979
980 /* Timestamps are valid for full sample-data-blocks only */
981 debug_sprintf_event(sfdbg, 6, "hw_perf_event_update: sdbt=%p "
982 "overflow=%llu timestamp=0x%llx\n",
983 sdbt, te->overflow,
984 (te->f) ? te->timestamp : 0ULL);
985
986 /* Collect all samples from a single sample-data-block and
987 * flag if an (perf) event overflow happened. If so, the PMU
988 * is stopped and remaining samples will be discarded.
989 */
990 hw_collect_samples(event, sdbt, &event_overflow);
69f239ed 991 num_sdb++;
8c069ff4
HB
992
993 /* Reset trailer */
994 xchg(&te->overflow, 0);
995 xchg((unsigned char *) te, 0x40);
996
997 /* Advance to next sample-data-block */
998 sdbt++;
999 if (is_link_entry(sdbt))
1000 sdbt = get_next_sdbt(sdbt);
1001
1002 /* Update event hardware registers */
1003 TEAR_REG(hwc) = (unsigned long) sdbt;
1004
1005 /* Stop processing sample-data if all samples of the current
1006 * sample-data-block were flushed even if it was not full.
1007 */
1008 if (flush_all && done)
1009 break;
1010
1011 /* If an event overflow happened, discard samples by
1012 * processing any remaining sample-data-blocks.
1013 */
1014 if (event_overflow)
1015 flush_all = 1;
1016 }
1017
69f239ed
HB
1018 /* Account sample overflows in the event hardware structure */
1019 if (sampl_overflow)
1020 OVERFLOW_REG(hwc) = DIV_ROUND_UP(OVERFLOW_REG(hwc) +
1021 sampl_overflow, 1 + num_sdb);
8c069ff4
HB
1022 if (sampl_overflow || event_overflow)
1023 debug_sprintf_event(sfdbg, 4, "hw_perf_event_update: "
1024 "overflow stats: sample=%llu event=%llu\n",
1025 sampl_overflow, event_overflow);
1026}
1027
1028static void cpumsf_pmu_read(struct perf_event *event)
1029{
1030 /* Nothing to do ... updates are interrupt-driven */
1031}
1032
1033/* Activate sampling control.
1034 * Next call of pmu_enable() starts sampling.
1035 */
1036static void cpumsf_pmu_start(struct perf_event *event, int flags)
1037{
1038 struct cpu_hw_sf *cpuhw = &__get_cpu_var(cpu_hw_sf);
1039
1040 if (WARN_ON_ONCE(!(event->hw.state & PERF_HES_STOPPED)))
1041 return;
1042
1043 if (flags & PERF_EF_RELOAD)
1044 WARN_ON_ONCE(!(event->hw.state & PERF_HES_UPTODATE));
1045
1046 perf_pmu_disable(event->pmu);
1047 event->hw.state = 0;
1048 cpuhw->lsctl.cs = 1;
1049 perf_pmu_enable(event->pmu);
1050}
1051
1052/* Deactivate sampling control.
1053 * Next call of pmu_enable() stops sampling.
1054 */
1055static void cpumsf_pmu_stop(struct perf_event *event, int flags)
1056{
1057 struct cpu_hw_sf *cpuhw = &__get_cpu_var(cpu_hw_sf);
1058
1059 if (event->hw.state & PERF_HES_STOPPED)
1060 return;
1061
1062 perf_pmu_disable(event->pmu);
1063 cpuhw->lsctl.cs = 0;
1064 event->hw.state |= PERF_HES_STOPPED;
1065
1066 if ((flags & PERF_EF_UPDATE) && !(event->hw.state & PERF_HES_UPTODATE)) {
1067 hw_perf_event_update(event, 1);
1068 event->hw.state |= PERF_HES_UPTODATE;
1069 }
1070 perf_pmu_enable(event->pmu);
1071}
1072
1073static int cpumsf_pmu_add(struct perf_event *event, int flags)
1074{
1075 struct cpu_hw_sf *cpuhw = &__get_cpu_var(cpu_hw_sf);
1076 int err;
1077
1078 if (cpuhw->flags & PMU_F_IN_USE)
1079 return -EAGAIN;
1080
1081 if (!cpuhw->sfb.sdbt)
1082 return -EINVAL;
1083
1084 err = 0;
1085 perf_pmu_disable(event->pmu);
1086
1087 event->hw.state = PERF_HES_UPTODATE | PERF_HES_STOPPED;
1088
1089 /* Set up sampling controls. Always program the sampling register
1090 * using the SDB-table start. Reset TEAR_REG event hardware register
1091 * that is used by hw_perf_event_update() to store the sampling buffer
1092 * position after samples have been flushed.
1093 */
1094 cpuhw->lsctl.s = 0;
1095 cpuhw->lsctl.h = 1;
69f239ed 1096 cpuhw->lsctl.tear = (unsigned long) cpuhw->sfb.sdbt;
8c069ff4
HB
1097 cpuhw->lsctl.dear = *(unsigned long *) cpuhw->sfb.sdbt;
1098 cpuhw->lsctl.interval = SAMPL_RATE(&event->hw);
1099 hw_reset_registers(&event->hw, cpuhw->sfb.sdbt);
1100
1101 /* Ensure sampling functions are in the disabled state. If disabled,
1102 * switch on sampling enable control. */
1103 if (WARN_ON_ONCE(cpuhw->lsctl.es == 1)) {
1104 err = -EAGAIN;
1105 goto out;
1106 }
1107 cpuhw->lsctl.es = 1;
1108
1109 /* Set in_use flag and store event */
1110 event->hw.idx = 0; /* only one sampling event per CPU supported */
1111 cpuhw->event = event;
1112 cpuhw->flags |= PMU_F_IN_USE;
1113
1114 if (flags & PERF_EF_START)
1115 cpumsf_pmu_start(event, PERF_EF_RELOAD);
1116out:
1117 perf_event_update_userpage(event);
1118 perf_pmu_enable(event->pmu);
1119 return err;
1120}
1121
1122static void cpumsf_pmu_del(struct perf_event *event, int flags)
1123{
1124 struct cpu_hw_sf *cpuhw = &__get_cpu_var(cpu_hw_sf);
1125
1126 perf_pmu_disable(event->pmu);
1127 cpumsf_pmu_stop(event, PERF_EF_UPDATE);
1128
1129 cpuhw->lsctl.es = 0;
1130 cpuhw->flags &= ~PMU_F_IN_USE;
1131 cpuhw->event = NULL;
1132
1133 perf_event_update_userpage(event);
1134 perf_pmu_enable(event->pmu);
1135}
1136
1137static int cpumsf_pmu_event_idx(struct perf_event *event)
1138{
1139 return event->hw.idx;
1140}
1141
1142CPUMF_EVENT_ATTR(SF, SF_CYCLES_BASIC, PERF_EVENT_CPUM_SF);
1143
1144static struct attribute *cpumsf_pmu_events_attr[] = {
1145 CPUMF_EVENT_PTR(SF, SF_CYCLES_BASIC),
1146 NULL,
1147};
1148
1149PMU_FORMAT_ATTR(event, "config:0-63");
1150
1151static struct attribute *cpumsf_pmu_format_attr[] = {
1152 &format_attr_event.attr,
1153 NULL,
1154};
1155
1156static struct attribute_group cpumsf_pmu_events_group = {
1157 .name = "events",
1158 .attrs = cpumsf_pmu_events_attr,
1159};
1160static struct attribute_group cpumsf_pmu_format_group = {
1161 .name = "format",
1162 .attrs = cpumsf_pmu_format_attr,
1163};
1164static const struct attribute_group *cpumsf_pmu_attr_groups[] = {
1165 &cpumsf_pmu_events_group,
1166 &cpumsf_pmu_format_group,
1167 NULL,
1168};
1169
1170static struct pmu cpumf_sampling = {
1171 .pmu_enable = cpumsf_pmu_enable,
1172 .pmu_disable = cpumsf_pmu_disable,
1173
1174 .event_init = cpumsf_pmu_event_init,
1175 .add = cpumsf_pmu_add,
1176 .del = cpumsf_pmu_del,
1177
1178 .start = cpumsf_pmu_start,
1179 .stop = cpumsf_pmu_stop,
1180 .read = cpumsf_pmu_read,
1181
1182 .event_idx = cpumsf_pmu_event_idx,
1183 .attr_groups = cpumsf_pmu_attr_groups,
1184};
1185
1186static void cpumf_measurement_alert(struct ext_code ext_code,
1187 unsigned int alert, unsigned long unused)
1188{
1189 struct cpu_hw_sf *cpuhw;
1190
1191 if (!(alert & CPU_MF_INT_SF_MASK))
1192 return;
1193 inc_irq_stat(IRQEXT_CMS);
1194 cpuhw = &__get_cpu_var(cpu_hw_sf);
1195
1196 /* Measurement alerts are shared and might happen when the PMU
1197 * is not reserved. Ignore these alerts in this case. */
1198 if (!(cpuhw->flags & PMU_F_RESERVED))
1199 return;
1200
1201 /* The processing below must take care of multiple alert events that
1202 * might be indicated concurrently. */
1203
1204 /* Program alert request */
1205 if (alert & CPU_MF_INT_SF_PRA) {
1206 if (cpuhw->flags & PMU_F_IN_USE)
1207 hw_perf_event_update(cpuhw->event, 0);
1208 else
1209 WARN_ON_ONCE(!(cpuhw->flags & PMU_F_IN_USE));
1210 }
1211
1212 /* Report measurement alerts only for non-PRA codes */
1213 if (alert != CPU_MF_INT_SF_PRA)
1214 debug_sprintf_event(sfdbg, 6, "measurement alert: 0x%x\n", alert);
1215
1216 /* Sampling authorization change request */
1217 if (alert & CPU_MF_INT_SF_SACA)
1218 qsi(&cpuhw->qsi);
1219
1220 /* Loss of sample data due to high-priority machine activities */
1221 if (alert & CPU_MF_INT_SF_LSDA) {
1222 pr_err("Sample data was lost\n");
1223 cpuhw->flags |= PMU_F_ERR_LSDA;
1224 sf_disable();
1225 }
1226
1227 /* Invalid sampling buffer entry */
1228 if (alert & (CPU_MF_INT_SF_IAE|CPU_MF_INT_SF_ISE)) {
1229 pr_err("A sampling buffer entry is incorrect (alert=0x%x)\n",
1230 alert);
1231 cpuhw->flags |= PMU_F_ERR_IBE;
1232 sf_disable();
1233 }
1234}
1235
1236static int __cpuinit cpumf_pmu_notifier(struct notifier_block *self,
1237 unsigned long action, void *hcpu)
1238{
1239 unsigned int cpu = (long) hcpu;
1240 int flags;
1241
1242 /* Ignore the notification if no events are scheduled on the PMU.
1243 * This might be racy...
1244 */
1245 if (!atomic_read(&num_events))
1246 return NOTIFY_OK;
1247
1248 switch (action & ~CPU_TASKS_FROZEN) {
1249 case CPU_ONLINE:
1250 case CPU_ONLINE_FROZEN:
1251 flags = PMC_INIT;
1252 smp_call_function_single(cpu, setup_pmc_cpu, &flags, 1);
1253 break;
1254 case CPU_DOWN_PREPARE:
1255 flags = PMC_RELEASE;
1256 smp_call_function_single(cpu, setup_pmc_cpu, &flags, 1);
1257 break;
1258 default:
1259 break;
1260 }
1261
1262 return NOTIFY_OK;
1263}
1264
69f239ed
HB
1265static int param_get_sfb_size(char *buffer, const struct kernel_param *kp)
1266{
1267 if (!cpum_sf_avail())
1268 return -ENODEV;
1269 return sprintf(buffer, "%lu,%lu", CPUM_SF_MIN_SDB, CPUM_SF_MAX_SDB);
1270}
1271
1272static int param_set_sfb_size(const char *val, const struct kernel_param *kp)
1273{
1274 int rc;
1275 unsigned long min, max;
1276
1277 if (!cpum_sf_avail())
1278 return -ENODEV;
1279 if (!val || !strlen(val))
1280 return -EINVAL;
1281
1282 /* Valid parameter values: "min,max" or "max" */
1283 min = CPUM_SF_MIN_SDB;
1284 max = CPUM_SF_MAX_SDB;
1285 if (strchr(val, ','))
1286 rc = (sscanf(val, "%lu,%lu", &min, &max) == 2) ? 0 : -EINVAL;
1287 else
1288 rc = kstrtoul(val, 10, &max);
1289
1290 if (min < 2 || min >= max || max > get_num_physpages())
1291 rc = -EINVAL;
1292 if (rc)
1293 return rc;
1294
1295 sfb_set_limits(min, max);
1296 pr_info("Changed sampling buffer settings: min=%lu max=%lu\n",
1297 CPUM_SF_MIN_SDB, CPUM_SF_MAX_SDB);
1298 return 0;
1299}
1300
1301#define param_check_sfb_size(name, p) __param_check(name, p, void)
1302static struct kernel_param_ops param_ops_sfb_size = {
1303 .set = param_set_sfb_size,
1304 .get = param_get_sfb_size,
1305};
1306
8c069ff4
HB
1307static int __init init_cpum_sampling_pmu(void)
1308{
1309 int err;
1310
1311 if (!cpum_sf_avail())
1312 return -ENODEV;
1313
1314 sfdbg = debug_register(KMSG_COMPONENT, 2, 1, 80);
1315 if (!sfdbg)
1316 pr_err("Registering for s390dbf failed\n");
1317 debug_register_view(sfdbg, &debug_sprintf_view);
1318
1319 err = register_external_interrupt(0x1407, cpumf_measurement_alert);
1320 if (err) {
1321 pr_err("Failed to register for CPU-measurement alerts\n");
1322 goto out;
1323 }
1324
1325 err = perf_pmu_register(&cpumf_sampling, "cpum_sf", PERF_TYPE_RAW);
1326 if (err) {
1327 pr_err("Failed to register cpum_sf pmu\n");
1328 unregister_external_interrupt(0x1407, cpumf_measurement_alert);
1329 goto out;
1330 }
1331 perf_cpu_notifier(cpumf_pmu_notifier);
1332out:
1333 return err;
1334}
1335arch_initcall(init_cpum_sampling_pmu);
69f239ed 1336core_param(cpum_sfb_size, CPUM_SF_MAX_SDB, sfb_size, 0640);