]> git.proxmox.com Git - mirror_ubuntu-hirsute-kernel.git/blame - arch/s390/kernel/perf_cpum_sf.c
s390/perf: add support for the CPU-Measurement Sampling Facility
[mirror_ubuntu-hirsute-kernel.git] / arch / s390 / kernel / perf_cpum_sf.c
CommitLineData
8c069ff4
HB
1/*
2 * Performance event support for the System z CPU-measurement Sampling Facility
3 *
4 * Copyright IBM Corp. 2013
5 * Author(s): Hendrik Brueckner <brueckner@linux.vnet.ibm.com>
6 *
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License (version 2 only)
9 * as published by the Free Software Foundation.
10 */
11#define KMSG_COMPONENT "cpum_sf"
12#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
13
14#include <linux/kernel.h>
15#include <linux/kernel_stat.h>
16#include <linux/perf_event.h>
17#include <linux/percpu.h>
18#include <linux/notifier.h>
19#include <linux/export.h>
20#include <asm/cpu_mf.h>
21#include <asm/irq.h>
22#include <asm/debug.h>
23#include <asm/timex.h>
24
25/* Minimum number of sample-data-block-tables:
26 * At least one table is required for the sampling buffer structure.
27 * A single table contains up to 511 pointers to sample-data-blocks.
28 */
29#define CPUM_SF_MIN_SDBT 1
30
31/* Minimum number of sample-data-blocks:
32 * The minimum designates a single page for sample-data-block, i.e.,
33 * up to 126 sample-data-blocks with a size of 32 bytes (bsdes).
34 */
35#define CPUM_SF_MIN_SDB 126
36
37/* Maximum number of sample-data-blocks:
38 * The maximum number designates approx. 256K per CPU including
39 * the given number of sample-data-blocks and taking the number
40 * of sample-data-block tables into account.
41 *
42 * Later, this number can be increased for extending the sampling
43 * buffer, for example, by factor 2 (512K) or 4 (1M).
44 */
45#define CPUM_SF_MAX_SDB 6471
46
47struct sf_buffer {
48 unsigned long sdbt; /* Sample-data-block-table origin */
49 /* buffer characteristics (required for buffer increments) */
50 unsigned long num_sdb; /* Number of sample-data-blocks */
51 unsigned long tail; /* last sample-data-block-table */
52};
53
54struct cpu_hw_sf {
55 /* CPU-measurement sampling information block */
56 struct hws_qsi_info_block qsi;
57 struct hws_lsctl_request_block lsctl;
58 struct sf_buffer sfb; /* Sampling buffer */
59 unsigned int flags; /* Status flags */
60 struct perf_event *event; /* Scheduled perf event */
61};
62static DEFINE_PER_CPU(struct cpu_hw_sf, cpu_hw_sf);
63
64/* Debug feature */
65static debug_info_t *sfdbg;
66
67/*
68 * sf_buffer_available() - Check for an allocated sampling buffer
69 */
70static int sf_buffer_available(struct cpu_hw_sf *cpuhw)
71{
72 return (cpuhw->sfb.sdbt) ? 1 : 0;
73}
74
75/*
76 * deallocate sampling facility buffer
77 */
78static void free_sampling_buffer(struct sf_buffer *sfb)
79{
80 unsigned long sdbt, *curr;
81
82 if (!sfb->sdbt)
83 return;
84
85 sdbt = sfb->sdbt;
86 curr = (unsigned long *) sdbt;
87
88 /* we'll free the SDBT after all SDBs are processed... */
89 while (1) {
90 if (!*curr || !sdbt)
91 break;
92
93 /* watch for link entry reset if found */
94 if (is_link_entry(curr)) {
95 curr = get_next_sdbt(curr);
96 if (sdbt)
97 free_page(sdbt);
98
99 /* we are done if we reach the origin */
100 if ((unsigned long) curr == sfb->sdbt)
101 break;
102 else
103 sdbt = (unsigned long) curr;
104 } else {
105 /* process SDB pointer */
106 if (*curr) {
107 free_page(*curr);
108 curr++;
109 }
110 }
111 }
112
113 debug_sprintf_event(sfdbg, 5,
114 "free_sampling_buffer: freed sdbt=%0lx\n", sfb->sdbt);
115 memset(sfb, 0, sizeof(*sfb));
116}
117
118/*
119 * allocate_sampling_buffer() - allocate sampler memory
120 *
121 * Allocates and initializes a sampling buffer structure using the
122 * specified number of sample-data-blocks (SDB). For each allocation,
123 * a 4K page is used. The number of sample-data-block-tables (SDBT)
124 * are calculated from SDBs.
125 * Also set the ALERT_REQ mask in each SDBs trailer.
126 *
127 * Returns zero on success, non-zero otherwise.
128 */
129static int alloc_sampling_buffer(struct sf_buffer *sfb, unsigned long num_sdb)
130{
131 int j, k, rc;
132 unsigned long *sdbt, *tail, *trailer;
133 unsigned long sdb;
134 unsigned long num_sdbt, sdb_per_table;
135
136 if (sfb->sdbt)
137 return -EINVAL;
138 sfb->num_sdb = 0;
139
140 /* Compute the number of required sample-data-block-tables (SDBT) */
141 num_sdbt = num_sdb / ((PAGE_SIZE - 8) / 8);
142 if (num_sdbt < CPUM_SF_MIN_SDBT)
143 num_sdbt = CPUM_SF_MIN_SDBT;
144 sdb_per_table = (PAGE_SIZE - 8) / 8;
145
146 debug_sprintf_event(sfdbg, 4, "alloc_sampling_buffer: num_sdbt=%lu "
147 "num_sdb=%lu sdb_per_table=%lu\n",
148 num_sdbt, num_sdb, sdb_per_table);
149 sdbt = NULL;
150 tail = sdbt;
151
152 for (j = 0; j < num_sdbt; j++) {
153 sdbt = (unsigned long *) get_zeroed_page(GFP_KERNEL);
154 if (!sdbt) {
155 rc = -ENOMEM;
156 goto allocate_sdbt_error;
157 }
158
159 /* save origin of sample-data-block-table */
160 if (!sfb->sdbt)
161 sfb->sdbt = (unsigned long) sdbt;
162
163 /* link current page to tail of chain */
164 if (tail)
165 *tail = (unsigned long)(void *) sdbt + 1;
166
167 for (k = 0; k < num_sdb && k < sdb_per_table; k++) {
168 /* get and set SDB page */
169 sdb = get_zeroed_page(GFP_KERNEL);
170 if (!sdb) {
171 rc = -ENOMEM;
172 goto allocate_sdbt_error;
173 }
174 *sdbt = sdb;
175 trailer = trailer_entry_ptr(*sdbt);
176 *trailer = SDB_TE_ALERT_REQ_MASK;
177 sdbt++;
178 }
179 num_sdb -= k;
180 sfb->num_sdb += k; /* count allocated sdb's */
181 tail = sdbt;
182 }
183
184 rc = 0;
185 if (tail)
186 *tail = sfb->sdbt + 1;
187 sfb->tail = (unsigned long) (void *)tail;
188
189allocate_sdbt_error:
190 if (rc)
191 free_sampling_buffer(sfb);
192 else
193 debug_sprintf_event(sfdbg, 4,
194 "alloc_sampling_buffer: tear=%0lx dear=%0lx\n",
195 sfb->sdbt, *(unsigned long *) sfb->sdbt);
196 return rc;
197}
198
199static int allocate_sdbt(struct cpu_hw_sf *cpuhw, const struct hw_perf_event *hwc)
200{
201 unsigned long n_sdb, freq;
202 unsigned long factor;
203
204 /* Calculate sampling buffers using 4K pages
205 *
206 * 1. Use frequency as input. The samping buffer is designed for
207 * a complete second. This can be adjusted through the "factor"
208 * variable.
209 * In any case, alloc_sampling_buffer() sets the Alert Request
210 * Control indicator to trigger measurement-alert to harvest
211 * sample-data-blocks (sdb).
212 *
213 * 2. Compute the number of sample-data-blocks and ensure a minimum
214 * of CPUM_SF_MIN_SDB. Also ensure the upper limit does not
215 * exceed CPUM_SF_MAX_SDB. See also the remarks for these
216 * symbolic constants.
217 *
218 * 3. Compute number of pages used for the sample-data-block-table
219 * and ensure a minimum of CPUM_SF_MIN_SDBT (at minimum one table
220 * to manage up to 511 sample-data-blocks).
221 */
222 freq = sample_rate_to_freq(&cpuhw->qsi, SAMPL_RATE(hwc));
223 factor = 1;
224 n_sdb = DIV_ROUND_UP(freq, factor * ((PAGE_SIZE-64) / cpuhw->qsi.bsdes));
225 if (n_sdb < CPUM_SF_MIN_SDB)
226 n_sdb = CPUM_SF_MIN_SDB;
227
228 /* Return if there is already a sampling buffer allocated.
229 * XXX Remove this later and check number of available and
230 * required sdb's and, if necessary, increase the sampling buffer.
231 */
232 if (sf_buffer_available(cpuhw))
233 return 0;
234
235 debug_sprintf_event(sfdbg, 3,
236 "allocate_sdbt: rate=%lu f=%lu sdb=%lu/%i cpuhw=%p\n",
237 SAMPL_RATE(hwc), freq, n_sdb, CPUM_SF_MAX_SDB, cpuhw);
238
239 return alloc_sampling_buffer(&cpuhw->sfb,
240 min_t(unsigned long, n_sdb, CPUM_SF_MAX_SDB));
241}
242
243
244/* Number of perf events counting hardware events */
245static atomic_t num_events;
246/* Used to avoid races in calling reserve/release_cpumf_hardware */
247static DEFINE_MUTEX(pmc_reserve_mutex);
248
249/*
250 * sf_disable() - Switch off sampling facility
251 */
252static int sf_disable(void)
253{
254 struct hws_lsctl_request_block sreq;
255
256 memset(&sreq, 0, sizeof(sreq));
257 return lsctl(&sreq);
258}
259
260
261#define PMC_INIT 0
262#define PMC_RELEASE 1
263static void setup_pmc_cpu(void *flags)
264{
265 int err;
266 struct cpu_hw_sf *cpusf = &__get_cpu_var(cpu_hw_sf);
267
268 /* XXX Improve error handling and pass a flag in the *flags
269 * variable to indicate failures. Alternatively, ignore
270 * (print) errors here and let the PMU functions fail if
271 * the per-cpu PMU_F_RESERVED flag is not.
272 */
273 err = 0;
274 switch (*((int *) flags)) {
275 case PMC_INIT:
276 memset(cpusf, 0, sizeof(*cpusf));
277 err = qsi(&cpusf->qsi);
278 if (err)
279 break;
280 cpusf->flags |= PMU_F_RESERVED;
281 err = sf_disable();
282 if (err)
283 pr_err("Switching off the sampling facility failed "
284 "with rc=%i\n", err);
285 debug_sprintf_event(sfdbg, 5,
286 "setup_pmc_cpu: initialized: cpuhw=%p\n", cpusf);
287 break;
288 case PMC_RELEASE:
289 cpusf->flags &= ~PMU_F_RESERVED;
290 err = sf_disable();
291 if (err) {
292 pr_err("Switching off the sampling facility failed "
293 "with rc=%i\n", err);
294 } else {
295 if (cpusf->sfb.sdbt)
296 free_sampling_buffer(&cpusf->sfb);
297 }
298 debug_sprintf_event(sfdbg, 5,
299 "setup_pmc_cpu: released: cpuhw=%p\n", cpusf);
300 break;
301 }
302}
303
304static void release_pmc_hardware(void)
305{
306 int flags = PMC_RELEASE;
307
308 irq_subclass_unregister(IRQ_SUBCLASS_MEASUREMENT_ALERT);
309 on_each_cpu(setup_pmc_cpu, &flags, 1);
310}
311
312static int reserve_pmc_hardware(void)
313{
314 int flags = PMC_INIT;
315
316 on_each_cpu(setup_pmc_cpu, &flags, 1);
317 irq_subclass_register(IRQ_SUBCLASS_MEASUREMENT_ALERT);
318
319 return 0;
320}
321
322static void hw_perf_event_destroy(struct perf_event *event)
323{
324 /* Release PMC if this is the last perf event */
325 if (!atomic_add_unless(&num_events, -1, 1)) {
326 mutex_lock(&pmc_reserve_mutex);
327 if (atomic_dec_return(&num_events) == 0)
328 release_pmc_hardware();
329 mutex_unlock(&pmc_reserve_mutex);
330 }
331}
332
333static void hw_init_period(struct hw_perf_event *hwc, u64 period)
334{
335 hwc->sample_period = period;
336 hwc->last_period = hwc->sample_period;
337 local64_set(&hwc->period_left, hwc->sample_period);
338}
339
340static void hw_reset_registers(struct hw_perf_event *hwc,
341 unsigned long sdbt_origin)
342{
343 TEAR_REG(hwc) = sdbt_origin; /* (re)set to first sdb table */
344}
345
346static unsigned long hw_limit_rate(const struct hws_qsi_info_block *si,
347 unsigned long rate)
348{
349 if (rate < si->min_sampl_rate)
350 return si->min_sampl_rate;
351 if (rate > si->max_sampl_rate)
352 return si->max_sampl_rate;
353 return rate;
354}
355
356static int __hw_perf_event_init(struct perf_event *event)
357{
358 struct cpu_hw_sf *cpuhw;
359 struct hws_qsi_info_block si;
360 struct perf_event_attr *attr = &event->attr;
361 struct hw_perf_event *hwc = &event->hw;
362 unsigned long rate;
363 int cpu, err;
364
365 /* Reserve CPU-measurement sampling facility */
366 err = 0;
367 if (!atomic_inc_not_zero(&num_events)) {
368 mutex_lock(&pmc_reserve_mutex);
369 if (atomic_read(&num_events) == 0 && reserve_pmc_hardware())
370 err = -EBUSY;
371 else
372 atomic_inc(&num_events);
373 mutex_unlock(&pmc_reserve_mutex);
374 }
375 event->destroy = hw_perf_event_destroy;
376
377 if (err)
378 goto out;
379
380 /* Access per-CPU sampling information (query sampling info) */
381 /*
382 * The event->cpu value can be -1 to count on every CPU, for example,
383 * when attaching to a task. If this is specified, use the query
384 * sampling info from the current CPU, otherwise use event->cpu to
385 * retrieve the per-CPU information.
386 * Later, cpuhw indicates whether to allocate sampling buffers for a
387 * particular CPU (cpuhw!=NULL) or each online CPU (cpuw==NULL).
388 */
389 memset(&si, 0, sizeof(si));
390 cpuhw = NULL;
391 if (event->cpu == -1)
392 qsi(&si);
393 else {
394 /* Event is pinned to a particular CPU, retrieve the per-CPU
395 * sampling structure for accessing the CPU-specific QSI.
396 */
397 cpuhw = &per_cpu(cpu_hw_sf, event->cpu);
398 si = cpuhw->qsi;
399 }
400
401 /* Check sampling facility authorization and, if not authorized,
402 * fall back to other PMUs. It is safe to check any CPU because
403 * the authorization is identical for all configured CPUs.
404 */
405 if (!si.as) {
406 err = -ENOENT;
407 goto out;
408 }
409
410 /* The sampling information (si) contains information about the
411 * min/max sampling intervals and the CPU speed. So calculate the
412 * correct sampling interval and avoid the whole period adjust
413 * feedback loop.
414 */
415 rate = 0;
416 if (attr->freq) {
417 rate = freq_to_sample_rate(&si, attr->sample_freq);
418 rate = hw_limit_rate(&si, rate);
419 attr->freq = 0;
420 attr->sample_period = rate;
421 } else {
422 /* The min/max sampling rates specifies the valid range
423 * of sample periods. If the specified sample period is
424 * out of range, limit the period to the range boundary.
425 */
426 rate = hw_limit_rate(&si, hwc->sample_period);
427
428 /* The perf core maintains a maximum sample rate that is
429 * configurable through the sysctl interface. Ensure the
430 * sampling rate does not exceed this value. This also helps
431 * to avoid throttling when pushing samples with
432 * perf_event_overflow().
433 */
434 if (sample_rate_to_freq(&si, rate) >
435 sysctl_perf_event_sample_rate) {
436 err = -EINVAL;
437 debug_sprintf_event(sfdbg, 1, "Sampling rate exceeds maximum perf sample rate\n");
438 goto out;
439 }
440 }
441 SAMPL_RATE(hwc) = rate;
442 hw_init_period(hwc, SAMPL_RATE(hwc));
443
444 /* Allocate the per-CPU sampling buffer using the CPU information
445 * from the event. If the event is not pinned to a particular
446 * CPU (event->cpu == -1; or cpuhw == NULL), allocate sampling
447 * buffers for each online CPU.
448 */
449 if (cpuhw)
450 /* Event is pinned to a particular CPU */
451 err = allocate_sdbt(cpuhw, hwc);
452 else {
453 /* Event is not pinned, allocate sampling buffer on
454 * each online CPU
455 */
456 for_each_online_cpu(cpu) {
457 cpuhw = &per_cpu(cpu_hw_sf, cpu);
458 err = allocate_sdbt(cpuhw, hwc);
459 if (err)
460 break;
461 }
462 }
463out:
464 return err;
465}
466
467static int cpumsf_pmu_event_init(struct perf_event *event)
468{
469 int err;
470
471 if (event->attr.type != PERF_TYPE_RAW)
472 return -ENOENT;
473
474 if (event->attr.config != PERF_EVENT_CPUM_SF)
475 return -ENOENT;
476
477 if (event->cpu >= nr_cpumask_bits ||
478 (event->cpu >= 0 && !cpu_online(event->cpu)))
479 return -ENODEV;
480
481 err = __hw_perf_event_init(event);
482 if (unlikely(err))
483 if (event->destroy)
484 event->destroy(event);
485 return err;
486}
487
488static void cpumsf_pmu_enable(struct pmu *pmu)
489{
490 struct cpu_hw_sf *cpuhw = &__get_cpu_var(cpu_hw_sf);
491 int err;
492
493 if (cpuhw->flags & PMU_F_ENABLED)
494 return;
495
496 if (cpuhw->flags & PMU_F_ERR_MASK)
497 return;
498
499 cpuhw->flags |= PMU_F_ENABLED;
500 barrier();
501
502 err = lsctl(&cpuhw->lsctl);
503 if (err) {
504 cpuhw->flags &= ~PMU_F_ENABLED;
505 pr_err("Loading sampling controls failed: op=%i err=%i\n",
506 1, err);
507 return;
508 }
509
510 debug_sprintf_event(sfdbg, 6, "pmu_enable: es=%i cs=%i tear=%p dear=%p\n",
511 cpuhw->lsctl.es, cpuhw->lsctl.cs,
512 (void *) cpuhw->lsctl.tear, (void *) cpuhw->lsctl.dear);
513}
514
515static void cpumsf_pmu_disable(struct pmu *pmu)
516{
517 struct cpu_hw_sf *cpuhw = &__get_cpu_var(cpu_hw_sf);
518 struct hws_lsctl_request_block inactive;
519 struct hws_qsi_info_block si;
520 int err;
521
522 if (!(cpuhw->flags & PMU_F_ENABLED))
523 return;
524
525 if (cpuhw->flags & PMU_F_ERR_MASK)
526 return;
527
528 /* Switch off sampling activation control */
529 inactive = cpuhw->lsctl;
530 inactive.cs = 0;
531
532 err = lsctl(&inactive);
533 if (err) {
534 pr_err("Loading sampling controls failed: op=%i err=%i\n",
535 2, err);
536 return;
537 }
538
539 /* Save state of TEAR and DEAR register contents */
540 if (!qsi(&si)) {
541 /* TEAR/DEAR values are valid only if the sampling facility is
542 * enabled. Note that cpumsf_pmu_disable() might be called even
543 * for a disabled sampling facility because cpumsf_pmu_enable()
544 * controls the enable/disable state.
545 */
546 if (si.es) {
547 cpuhw->lsctl.tear = si.tear;
548 cpuhw->lsctl.dear = si.dear;
549 }
550 } else
551 debug_sprintf_event(sfdbg, 3, "cpumsf_pmu_disable: "
552 "qsi() failed with err=%i\n", err);
553
554 cpuhw->flags &= ~PMU_F_ENABLED;
555}
556
557/* perf_push_sample() - Push samples to perf
558 * @event: The perf event
559 * @sample: Hardware sample data
560 *
561 * Use the hardware sample data to create perf event sample. The sample
562 * is the pushed to the event subsystem and the function checks for
563 * possible event overflows. If an event overflow occurs, the PMU is
564 * stopped.
565 *
566 * Return non-zero if an event overflow occurred.
567 */
568static int perf_push_sample(struct perf_event *event,
569 struct hws_data_entry *sample)
570{
571 int overflow;
572 struct pt_regs regs;
573 struct perf_sample_data data;
574
575 /* Skip samples that are invalid or for which the instruction address
576 * is not predictable. For the latter, the wait-state bit is set.
577 */
578 if (sample->I || sample->W)
579 return 0;
580
581 perf_sample_data_init(&data, 0, event->hw.last_period);
582
583 memset(&regs, 0, sizeof(regs));
584 regs.psw.addr = sample->ia;
585 if (sample->T)
586 regs.psw.mask |= PSW_MASK_DAT;
587 if (sample->W)
588 regs.psw.mask |= PSW_MASK_WAIT;
589 if (sample->P)
590 regs.psw.mask |= PSW_MASK_PSTATE;
591 switch (sample->AS) {
592 case 0x0:
593 regs.psw.mask |= PSW_ASC_PRIMARY;
594 break;
595 case 0x1:
596 regs.psw.mask |= PSW_ASC_ACCREG;
597 break;
598 case 0x2:
599 regs.psw.mask |= PSW_ASC_SECONDARY;
600 break;
601 case 0x3:
602 regs.psw.mask |= PSW_ASC_HOME;
603 break;
604 }
605
606 overflow = 0;
607 if (perf_event_overflow(event, &data, &regs)) {
608 overflow = 1;
609 event->pmu->stop(event, 0);
610 debug_sprintf_event(sfdbg, 4, "perf_push_sample: PMU stopped"
611 " because of an event overflow\n");
612 }
613 perf_event_update_userpage(event);
614
615 return overflow;
616}
617
618static void perf_event_count_update(struct perf_event *event, u64 count)
619{
620 local64_add(count, &event->count);
621}
622
623/* hw_collect_samples() - Walk through a sample-data-block and collect samples
624 * @event: The perf event
625 * @sdbt: Sample-data-block table
626 * @overflow: Event overflow counter
627 *
628 * Walks through a sample-data-block and collects hardware sample-data that is
629 * pushed to the perf event subsystem. The overflow reports the number of
630 * samples that has been discarded due to an event overflow.
631 */
632static void hw_collect_samples(struct perf_event *event, unsigned long *sdbt,
633 unsigned long long *overflow)
634{
635 struct hws_data_entry *sample;
636 unsigned long *trailer;
637
638 trailer = trailer_entry_ptr(*sdbt);
639 sample = (struct hws_data_entry *) *sdbt;
640 while ((unsigned long *) sample < trailer) {
641 /* Check for an empty sample */
642 if (!sample->def)
643 break;
644
645 /* Update perf event period */
646 perf_event_count_update(event, SAMPL_RATE(&event->hw));
647
648 /* Check for basic sampling mode */
649 if (sample->def == 0x0001) {
650 /* If an event overflow occurred, the PMU is stopped to
651 * throttle event delivery. Remaining sample data is
652 * discarded.
653 */
654 if (!*overflow)
655 *overflow = perf_push_sample(event, sample);
656 else
657 /* Count discarded samples */
658 *overflow += 1;
659 } else
660 /* Sample slot is not yet written or other record */
661 debug_sprintf_event(sfdbg, 5, "hw_collect_samples: "
662 "Unknown sample data entry format:"
663 " %i\n", sample->def);
664
665 /* Reset sample slot and advance to next sample */
666 sample->def = 0;
667 sample++;
668 }
669}
670
671/* hw_perf_event_update() - Process sampling buffer
672 * @event: The perf event
673 * @flush_all: Flag to also flush partially filled sample-data-blocks
674 *
675 * Processes the sampling buffer and create perf event samples.
676 * The sampling buffer position are retrieved and saved in the TEAR_REG
677 * register of the specified perf event.
678 *
679 * Only full sample-data-blocks are processed. Specify the flash_all flag
680 * to also walk through partially filled sample-data-blocks.
681 *
682 */
683static void hw_perf_event_update(struct perf_event *event, int flush_all)
684{
685 struct hw_perf_event *hwc = &event->hw;
686 struct hws_trailer_entry *te;
687 unsigned long *sdbt;
688 unsigned long long event_overflow, sampl_overflow;
689 int done;
690
691 sdbt = (unsigned long *) TEAR_REG(hwc);
692 done = event_overflow = sampl_overflow = 0;
693 while (!done) {
694 /* Get the trailer entry of the sample-data-block */
695 te = (struct hws_trailer_entry *) trailer_entry_ptr(*sdbt);
696
697 /* Leave loop if no more work to do (block full indicator) */
698 if (!te->f) {
699 done = 1;
700 if (!flush_all)
701 break;
702 }
703
704 /* Check sample overflow count */
705 if (te->overflow) {
706 /* Increment sample overflow counter */
707 sampl_overflow += te->overflow;
708
709 /* XXX: If an sample overflow occurs, increase the
710 * sampling buffer. Set a "realloc" flag because
711 * the sampler must be re-enabled for changing
712 * the sample-data-block-table content.
713 */
714 }
715
716 /* Timestamps are valid for full sample-data-blocks only */
717 debug_sprintf_event(sfdbg, 6, "hw_perf_event_update: sdbt=%p "
718 "overflow=%llu timestamp=0x%llx\n",
719 sdbt, te->overflow,
720 (te->f) ? te->timestamp : 0ULL);
721
722 /* Collect all samples from a single sample-data-block and
723 * flag if an (perf) event overflow happened. If so, the PMU
724 * is stopped and remaining samples will be discarded.
725 */
726 hw_collect_samples(event, sdbt, &event_overflow);
727
728 /* Reset trailer */
729 xchg(&te->overflow, 0);
730 xchg((unsigned char *) te, 0x40);
731
732 /* Advance to next sample-data-block */
733 sdbt++;
734 if (is_link_entry(sdbt))
735 sdbt = get_next_sdbt(sdbt);
736
737 /* Update event hardware registers */
738 TEAR_REG(hwc) = (unsigned long) sdbt;
739
740 /* Stop processing sample-data if all samples of the current
741 * sample-data-block were flushed even if it was not full.
742 */
743 if (flush_all && done)
744 break;
745
746 /* If an event overflow happened, discard samples by
747 * processing any remaining sample-data-blocks.
748 */
749 if (event_overflow)
750 flush_all = 1;
751 }
752
753 if (sampl_overflow || event_overflow)
754 debug_sprintf_event(sfdbg, 4, "hw_perf_event_update: "
755 "overflow stats: sample=%llu event=%llu\n",
756 sampl_overflow, event_overflow);
757}
758
759static void cpumsf_pmu_read(struct perf_event *event)
760{
761 /* Nothing to do ... updates are interrupt-driven */
762}
763
764/* Activate sampling control.
765 * Next call of pmu_enable() starts sampling.
766 */
767static void cpumsf_pmu_start(struct perf_event *event, int flags)
768{
769 struct cpu_hw_sf *cpuhw = &__get_cpu_var(cpu_hw_sf);
770
771 if (WARN_ON_ONCE(!(event->hw.state & PERF_HES_STOPPED)))
772 return;
773
774 if (flags & PERF_EF_RELOAD)
775 WARN_ON_ONCE(!(event->hw.state & PERF_HES_UPTODATE));
776
777 perf_pmu_disable(event->pmu);
778 event->hw.state = 0;
779 cpuhw->lsctl.cs = 1;
780 perf_pmu_enable(event->pmu);
781}
782
783/* Deactivate sampling control.
784 * Next call of pmu_enable() stops sampling.
785 */
786static void cpumsf_pmu_stop(struct perf_event *event, int flags)
787{
788 struct cpu_hw_sf *cpuhw = &__get_cpu_var(cpu_hw_sf);
789
790 if (event->hw.state & PERF_HES_STOPPED)
791 return;
792
793 perf_pmu_disable(event->pmu);
794 cpuhw->lsctl.cs = 0;
795 event->hw.state |= PERF_HES_STOPPED;
796
797 if ((flags & PERF_EF_UPDATE) && !(event->hw.state & PERF_HES_UPTODATE)) {
798 hw_perf_event_update(event, 1);
799 event->hw.state |= PERF_HES_UPTODATE;
800 }
801 perf_pmu_enable(event->pmu);
802}
803
804static int cpumsf_pmu_add(struct perf_event *event, int flags)
805{
806 struct cpu_hw_sf *cpuhw = &__get_cpu_var(cpu_hw_sf);
807 int err;
808
809 if (cpuhw->flags & PMU_F_IN_USE)
810 return -EAGAIN;
811
812 if (!cpuhw->sfb.sdbt)
813 return -EINVAL;
814
815 err = 0;
816 perf_pmu_disable(event->pmu);
817
818 event->hw.state = PERF_HES_UPTODATE | PERF_HES_STOPPED;
819
820 /* Set up sampling controls. Always program the sampling register
821 * using the SDB-table start. Reset TEAR_REG event hardware register
822 * that is used by hw_perf_event_update() to store the sampling buffer
823 * position after samples have been flushed.
824 */
825 cpuhw->lsctl.s = 0;
826 cpuhw->lsctl.h = 1;
827 cpuhw->lsctl.tear = cpuhw->sfb.sdbt;
828 cpuhw->lsctl.dear = *(unsigned long *) cpuhw->sfb.sdbt;
829 cpuhw->lsctl.interval = SAMPL_RATE(&event->hw);
830 hw_reset_registers(&event->hw, cpuhw->sfb.sdbt);
831
832 /* Ensure sampling functions are in the disabled state. If disabled,
833 * switch on sampling enable control. */
834 if (WARN_ON_ONCE(cpuhw->lsctl.es == 1)) {
835 err = -EAGAIN;
836 goto out;
837 }
838 cpuhw->lsctl.es = 1;
839
840 /* Set in_use flag and store event */
841 event->hw.idx = 0; /* only one sampling event per CPU supported */
842 cpuhw->event = event;
843 cpuhw->flags |= PMU_F_IN_USE;
844
845 if (flags & PERF_EF_START)
846 cpumsf_pmu_start(event, PERF_EF_RELOAD);
847out:
848 perf_event_update_userpage(event);
849 perf_pmu_enable(event->pmu);
850 return err;
851}
852
853static void cpumsf_pmu_del(struct perf_event *event, int flags)
854{
855 struct cpu_hw_sf *cpuhw = &__get_cpu_var(cpu_hw_sf);
856
857 perf_pmu_disable(event->pmu);
858 cpumsf_pmu_stop(event, PERF_EF_UPDATE);
859
860 cpuhw->lsctl.es = 0;
861 cpuhw->flags &= ~PMU_F_IN_USE;
862 cpuhw->event = NULL;
863
864 perf_event_update_userpage(event);
865 perf_pmu_enable(event->pmu);
866}
867
868static int cpumsf_pmu_event_idx(struct perf_event *event)
869{
870 return event->hw.idx;
871}
872
873CPUMF_EVENT_ATTR(SF, SF_CYCLES_BASIC, PERF_EVENT_CPUM_SF);
874
875static struct attribute *cpumsf_pmu_events_attr[] = {
876 CPUMF_EVENT_PTR(SF, SF_CYCLES_BASIC),
877 NULL,
878};
879
880PMU_FORMAT_ATTR(event, "config:0-63");
881
882static struct attribute *cpumsf_pmu_format_attr[] = {
883 &format_attr_event.attr,
884 NULL,
885};
886
887static struct attribute_group cpumsf_pmu_events_group = {
888 .name = "events",
889 .attrs = cpumsf_pmu_events_attr,
890};
891static struct attribute_group cpumsf_pmu_format_group = {
892 .name = "format",
893 .attrs = cpumsf_pmu_format_attr,
894};
895static const struct attribute_group *cpumsf_pmu_attr_groups[] = {
896 &cpumsf_pmu_events_group,
897 &cpumsf_pmu_format_group,
898 NULL,
899};
900
901static struct pmu cpumf_sampling = {
902 .pmu_enable = cpumsf_pmu_enable,
903 .pmu_disable = cpumsf_pmu_disable,
904
905 .event_init = cpumsf_pmu_event_init,
906 .add = cpumsf_pmu_add,
907 .del = cpumsf_pmu_del,
908
909 .start = cpumsf_pmu_start,
910 .stop = cpumsf_pmu_stop,
911 .read = cpumsf_pmu_read,
912
913 .event_idx = cpumsf_pmu_event_idx,
914 .attr_groups = cpumsf_pmu_attr_groups,
915};
916
917static void cpumf_measurement_alert(struct ext_code ext_code,
918 unsigned int alert, unsigned long unused)
919{
920 struct cpu_hw_sf *cpuhw;
921
922 if (!(alert & CPU_MF_INT_SF_MASK))
923 return;
924 inc_irq_stat(IRQEXT_CMS);
925 cpuhw = &__get_cpu_var(cpu_hw_sf);
926
927 /* Measurement alerts are shared and might happen when the PMU
928 * is not reserved. Ignore these alerts in this case. */
929 if (!(cpuhw->flags & PMU_F_RESERVED))
930 return;
931
932 /* The processing below must take care of multiple alert events that
933 * might be indicated concurrently. */
934
935 /* Program alert request */
936 if (alert & CPU_MF_INT_SF_PRA) {
937 if (cpuhw->flags & PMU_F_IN_USE)
938 hw_perf_event_update(cpuhw->event, 0);
939 else
940 WARN_ON_ONCE(!(cpuhw->flags & PMU_F_IN_USE));
941 }
942
943 /* Report measurement alerts only for non-PRA codes */
944 if (alert != CPU_MF_INT_SF_PRA)
945 debug_sprintf_event(sfdbg, 6, "measurement alert: 0x%x\n", alert);
946
947 /* Sampling authorization change request */
948 if (alert & CPU_MF_INT_SF_SACA)
949 qsi(&cpuhw->qsi);
950
951 /* Loss of sample data due to high-priority machine activities */
952 if (alert & CPU_MF_INT_SF_LSDA) {
953 pr_err("Sample data was lost\n");
954 cpuhw->flags |= PMU_F_ERR_LSDA;
955 sf_disable();
956 }
957
958 /* Invalid sampling buffer entry */
959 if (alert & (CPU_MF_INT_SF_IAE|CPU_MF_INT_SF_ISE)) {
960 pr_err("A sampling buffer entry is incorrect (alert=0x%x)\n",
961 alert);
962 cpuhw->flags |= PMU_F_ERR_IBE;
963 sf_disable();
964 }
965}
966
967static int __cpuinit cpumf_pmu_notifier(struct notifier_block *self,
968 unsigned long action, void *hcpu)
969{
970 unsigned int cpu = (long) hcpu;
971 int flags;
972
973 /* Ignore the notification if no events are scheduled on the PMU.
974 * This might be racy...
975 */
976 if (!atomic_read(&num_events))
977 return NOTIFY_OK;
978
979 switch (action & ~CPU_TASKS_FROZEN) {
980 case CPU_ONLINE:
981 case CPU_ONLINE_FROZEN:
982 flags = PMC_INIT;
983 smp_call_function_single(cpu, setup_pmc_cpu, &flags, 1);
984 break;
985 case CPU_DOWN_PREPARE:
986 flags = PMC_RELEASE;
987 smp_call_function_single(cpu, setup_pmc_cpu, &flags, 1);
988 break;
989 default:
990 break;
991 }
992
993 return NOTIFY_OK;
994}
995
996static int __init init_cpum_sampling_pmu(void)
997{
998 int err;
999
1000 if (!cpum_sf_avail())
1001 return -ENODEV;
1002
1003 sfdbg = debug_register(KMSG_COMPONENT, 2, 1, 80);
1004 if (!sfdbg)
1005 pr_err("Registering for s390dbf failed\n");
1006 debug_register_view(sfdbg, &debug_sprintf_view);
1007
1008 err = register_external_interrupt(0x1407, cpumf_measurement_alert);
1009 if (err) {
1010 pr_err("Failed to register for CPU-measurement alerts\n");
1011 goto out;
1012 }
1013
1014 err = perf_pmu_register(&cpumf_sampling, "cpum_sf", PERF_TYPE_RAW);
1015 if (err) {
1016 pr_err("Failed to register cpum_sf pmu\n");
1017 unregister_external_interrupt(0x1407, cpumf_measurement_alert);
1018 goto out;
1019 }
1020 perf_cpu_notifier(cpumf_pmu_notifier);
1021out:
1022 return err;
1023}
1024arch_initcall(init_cpum_sampling_pmu);