]> git.proxmox.com Git - mirror_ubuntu-hirsute-kernel.git/blame - arch/s390/kernel/perf_cpum_sf.c
s390/pci: reenable per default
[mirror_ubuntu-hirsute-kernel.git] / arch / s390 / kernel / perf_cpum_sf.c
CommitLineData
8c069ff4
HB
1/*
2 * Performance event support for the System z CPU-measurement Sampling Facility
3 *
4 * Copyright IBM Corp. 2013
5 * Author(s): Hendrik Brueckner <brueckner@linux.vnet.ibm.com>
6 *
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License (version 2 only)
9 * as published by the Free Software Foundation.
10 */
11#define KMSG_COMPONENT "cpum_sf"
12#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
13
14#include <linux/kernel.h>
15#include <linux/kernel_stat.h>
16#include <linux/perf_event.h>
17#include <linux/percpu.h>
18#include <linux/notifier.h>
19#include <linux/export.h>
20#include <asm/cpu_mf.h>
21#include <asm/irq.h>
22#include <asm/debug.h>
23#include <asm/timex.h>
24
25/* Minimum number of sample-data-block-tables:
26 * At least one table is required for the sampling buffer structure.
27 * A single table contains up to 511 pointers to sample-data-blocks.
28 */
29#define CPUM_SF_MIN_SDBT 1
30
31/* Minimum number of sample-data-blocks:
32 * The minimum designates a single page for sample-data-block, i.e.,
33 * up to 126 sample-data-blocks with a size of 32 bytes (bsdes).
34 */
35#define CPUM_SF_MIN_SDB 126
36
37/* Maximum number of sample-data-blocks:
38 * The maximum number designates approx. 256K per CPU including
39 * the given number of sample-data-blocks and taking the number
40 * of sample-data-block tables into account.
41 *
42 * Later, this number can be increased for extending the sampling
43 * buffer, for example, by factor 2 (512K) or 4 (1M).
44 */
45#define CPUM_SF_MAX_SDB 6471
46
47struct sf_buffer {
48 unsigned long sdbt; /* Sample-data-block-table origin */
49 /* buffer characteristics (required for buffer increments) */
50 unsigned long num_sdb; /* Number of sample-data-blocks */
51 unsigned long tail; /* last sample-data-block-table */
52};
53
54struct cpu_hw_sf {
55 /* CPU-measurement sampling information block */
56 struct hws_qsi_info_block qsi;
57 struct hws_lsctl_request_block lsctl;
58 struct sf_buffer sfb; /* Sampling buffer */
59 unsigned int flags; /* Status flags */
60 struct perf_event *event; /* Scheduled perf event */
61};
62static DEFINE_PER_CPU(struct cpu_hw_sf, cpu_hw_sf);
63
64/* Debug feature */
65static debug_info_t *sfdbg;
66
67/*
68 * sf_buffer_available() - Check for an allocated sampling buffer
69 */
70static int sf_buffer_available(struct cpu_hw_sf *cpuhw)
71{
72 return (cpuhw->sfb.sdbt) ? 1 : 0;
73}
74
75/*
76 * deallocate sampling facility buffer
77 */
78static void free_sampling_buffer(struct sf_buffer *sfb)
79{
80 unsigned long sdbt, *curr;
81
82 if (!sfb->sdbt)
83 return;
84
85 sdbt = sfb->sdbt;
86 curr = (unsigned long *) sdbt;
87
88 /* we'll free the SDBT after all SDBs are processed... */
89 while (1) {
90 if (!*curr || !sdbt)
91 break;
92
93 /* watch for link entry reset if found */
94 if (is_link_entry(curr)) {
95 curr = get_next_sdbt(curr);
96 if (sdbt)
97 free_page(sdbt);
98
99 /* we are done if we reach the origin */
100 if ((unsigned long) curr == sfb->sdbt)
101 break;
102 else
103 sdbt = (unsigned long) curr;
104 } else {
105 /* process SDB pointer */
106 if (*curr) {
107 free_page(*curr);
108 curr++;
109 }
110 }
111 }
112
113 debug_sprintf_event(sfdbg, 5,
114 "free_sampling_buffer: freed sdbt=%0lx\n", sfb->sdbt);
115 memset(sfb, 0, sizeof(*sfb));
116}
117
118/*
119 * allocate_sampling_buffer() - allocate sampler memory
120 *
121 * Allocates and initializes a sampling buffer structure using the
122 * specified number of sample-data-blocks (SDB). For each allocation,
123 * a 4K page is used. The number of sample-data-block-tables (SDBT)
124 * are calculated from SDBs.
125 * Also set the ALERT_REQ mask in each SDBs trailer.
126 *
127 * Returns zero on success, non-zero otherwise.
128 */
129static int alloc_sampling_buffer(struct sf_buffer *sfb, unsigned long num_sdb)
130{
131 int j, k, rc;
132 unsigned long *sdbt, *tail, *trailer;
133 unsigned long sdb;
134 unsigned long num_sdbt, sdb_per_table;
135
136 if (sfb->sdbt)
137 return -EINVAL;
138 sfb->num_sdb = 0;
139
140 /* Compute the number of required sample-data-block-tables (SDBT) */
141 num_sdbt = num_sdb / ((PAGE_SIZE - 8) / 8);
142 if (num_sdbt < CPUM_SF_MIN_SDBT)
143 num_sdbt = CPUM_SF_MIN_SDBT;
144 sdb_per_table = (PAGE_SIZE - 8) / 8;
145
146 debug_sprintf_event(sfdbg, 4, "alloc_sampling_buffer: num_sdbt=%lu "
147 "num_sdb=%lu sdb_per_table=%lu\n",
148 num_sdbt, num_sdb, sdb_per_table);
149 sdbt = NULL;
150 tail = sdbt;
151
152 for (j = 0; j < num_sdbt; j++) {
153 sdbt = (unsigned long *) get_zeroed_page(GFP_KERNEL);
154 if (!sdbt) {
155 rc = -ENOMEM;
156 goto allocate_sdbt_error;
157 }
158
159 /* save origin of sample-data-block-table */
160 if (!sfb->sdbt)
161 sfb->sdbt = (unsigned long) sdbt;
162
163 /* link current page to tail of chain */
164 if (tail)
165 *tail = (unsigned long)(void *) sdbt + 1;
166
167 for (k = 0; k < num_sdb && k < sdb_per_table; k++) {
168 /* get and set SDB page */
169 sdb = get_zeroed_page(GFP_KERNEL);
170 if (!sdb) {
171 rc = -ENOMEM;
172 goto allocate_sdbt_error;
173 }
174 *sdbt = sdb;
175 trailer = trailer_entry_ptr(*sdbt);
176 *trailer = SDB_TE_ALERT_REQ_MASK;
177 sdbt++;
178 }
179 num_sdb -= k;
180 sfb->num_sdb += k; /* count allocated sdb's */
181 tail = sdbt;
182 }
183
184 rc = 0;
185 if (tail)
186 *tail = sfb->sdbt + 1;
187 sfb->tail = (unsigned long) (void *)tail;
188
189allocate_sdbt_error:
190 if (rc)
191 free_sampling_buffer(sfb);
192 else
193 debug_sprintf_event(sfdbg, 4,
194 "alloc_sampling_buffer: tear=%0lx dear=%0lx\n",
195 sfb->sdbt, *(unsigned long *) sfb->sdbt);
196 return rc;
197}
198
199static int allocate_sdbt(struct cpu_hw_sf *cpuhw, const struct hw_perf_event *hwc)
200{
201 unsigned long n_sdb, freq;
202 unsigned long factor;
203
204 /* Calculate sampling buffers using 4K pages
205 *
206 * 1. Use frequency as input. The samping buffer is designed for
207 * a complete second. This can be adjusted through the "factor"
208 * variable.
209 * In any case, alloc_sampling_buffer() sets the Alert Request
210 * Control indicator to trigger measurement-alert to harvest
211 * sample-data-blocks (sdb).
212 *
213 * 2. Compute the number of sample-data-blocks and ensure a minimum
214 * of CPUM_SF_MIN_SDB. Also ensure the upper limit does not
215 * exceed CPUM_SF_MAX_SDB. See also the remarks for these
216 * symbolic constants.
217 *
218 * 3. Compute number of pages used for the sample-data-block-table
219 * and ensure a minimum of CPUM_SF_MIN_SDBT (at minimum one table
220 * to manage up to 511 sample-data-blocks).
221 */
222 freq = sample_rate_to_freq(&cpuhw->qsi, SAMPL_RATE(hwc));
223 factor = 1;
224 n_sdb = DIV_ROUND_UP(freq, factor * ((PAGE_SIZE-64) / cpuhw->qsi.bsdes));
225 if (n_sdb < CPUM_SF_MIN_SDB)
226 n_sdb = CPUM_SF_MIN_SDB;
227
228 /* Return if there is already a sampling buffer allocated.
229 * XXX Remove this later and check number of available and
230 * required sdb's and, if necessary, increase the sampling buffer.
231 */
232 if (sf_buffer_available(cpuhw))
233 return 0;
234
235 debug_sprintf_event(sfdbg, 3,
236 "allocate_sdbt: rate=%lu f=%lu sdb=%lu/%i cpuhw=%p\n",
237 SAMPL_RATE(hwc), freq, n_sdb, CPUM_SF_MAX_SDB, cpuhw);
238
239 return alloc_sampling_buffer(&cpuhw->sfb,
240 min_t(unsigned long, n_sdb, CPUM_SF_MAX_SDB));
241}
242
243
244/* Number of perf events counting hardware events */
245static atomic_t num_events;
246/* Used to avoid races in calling reserve/release_cpumf_hardware */
247static DEFINE_MUTEX(pmc_reserve_mutex);
248
249/*
250 * sf_disable() - Switch off sampling facility
251 */
252static int sf_disable(void)
253{
254 struct hws_lsctl_request_block sreq;
255
256 memset(&sreq, 0, sizeof(sreq));
257 return lsctl(&sreq);
258}
259
260
261#define PMC_INIT 0
262#define PMC_RELEASE 1
e28bb79d 263#define PMC_FAILURE 2
8c069ff4
HB
264static void setup_pmc_cpu(void *flags)
265{
266 int err;
267 struct cpu_hw_sf *cpusf = &__get_cpu_var(cpu_hw_sf);
268
8c069ff4
HB
269 err = 0;
270 switch (*((int *) flags)) {
271 case PMC_INIT:
272 memset(cpusf, 0, sizeof(*cpusf));
273 err = qsi(&cpusf->qsi);
274 if (err)
275 break;
276 cpusf->flags |= PMU_F_RESERVED;
277 err = sf_disable();
278 if (err)
279 pr_err("Switching off the sampling facility failed "
280 "with rc=%i\n", err);
281 debug_sprintf_event(sfdbg, 5,
282 "setup_pmc_cpu: initialized: cpuhw=%p\n", cpusf);
283 break;
284 case PMC_RELEASE:
285 cpusf->flags &= ~PMU_F_RESERVED;
286 err = sf_disable();
287 if (err) {
288 pr_err("Switching off the sampling facility failed "
289 "with rc=%i\n", err);
290 } else {
291 if (cpusf->sfb.sdbt)
292 free_sampling_buffer(&cpusf->sfb);
293 }
294 debug_sprintf_event(sfdbg, 5,
295 "setup_pmc_cpu: released: cpuhw=%p\n", cpusf);
296 break;
297 }
e28bb79d
HB
298 if (err)
299 *((int *) flags) |= PMC_FAILURE;
8c069ff4
HB
300}
301
302static void release_pmc_hardware(void)
303{
304 int flags = PMC_RELEASE;
305
306 irq_subclass_unregister(IRQ_SUBCLASS_MEASUREMENT_ALERT);
307 on_each_cpu(setup_pmc_cpu, &flags, 1);
e28bb79d 308 perf_release_sampling();
8c069ff4
HB
309}
310
311static int reserve_pmc_hardware(void)
312{
313 int flags = PMC_INIT;
e28bb79d 314 int err;
8c069ff4 315
e28bb79d
HB
316 err = perf_reserve_sampling();
317 if (err)
318 return err;
8c069ff4 319 on_each_cpu(setup_pmc_cpu, &flags, 1);
e28bb79d
HB
320 if (flags & PMC_FAILURE) {
321 release_pmc_hardware();
322 return -ENODEV;
323 }
8c069ff4
HB
324 irq_subclass_register(IRQ_SUBCLASS_MEASUREMENT_ALERT);
325
326 return 0;
327}
328
329static void hw_perf_event_destroy(struct perf_event *event)
330{
331 /* Release PMC if this is the last perf event */
332 if (!atomic_add_unless(&num_events, -1, 1)) {
333 mutex_lock(&pmc_reserve_mutex);
334 if (atomic_dec_return(&num_events) == 0)
335 release_pmc_hardware();
336 mutex_unlock(&pmc_reserve_mutex);
337 }
338}
339
340static void hw_init_period(struct hw_perf_event *hwc, u64 period)
341{
342 hwc->sample_period = period;
343 hwc->last_period = hwc->sample_period;
344 local64_set(&hwc->period_left, hwc->sample_period);
345}
346
347static void hw_reset_registers(struct hw_perf_event *hwc,
348 unsigned long sdbt_origin)
349{
350 TEAR_REG(hwc) = sdbt_origin; /* (re)set to first sdb table */
351}
352
353static unsigned long hw_limit_rate(const struct hws_qsi_info_block *si,
354 unsigned long rate)
355{
356 if (rate < si->min_sampl_rate)
357 return si->min_sampl_rate;
358 if (rate > si->max_sampl_rate)
359 return si->max_sampl_rate;
360 return rate;
361}
362
363static int __hw_perf_event_init(struct perf_event *event)
364{
365 struct cpu_hw_sf *cpuhw;
366 struct hws_qsi_info_block si;
367 struct perf_event_attr *attr = &event->attr;
368 struct hw_perf_event *hwc = &event->hw;
369 unsigned long rate;
370 int cpu, err;
371
372 /* Reserve CPU-measurement sampling facility */
373 err = 0;
374 if (!atomic_inc_not_zero(&num_events)) {
375 mutex_lock(&pmc_reserve_mutex);
376 if (atomic_read(&num_events) == 0 && reserve_pmc_hardware())
377 err = -EBUSY;
378 else
379 atomic_inc(&num_events);
380 mutex_unlock(&pmc_reserve_mutex);
381 }
382 event->destroy = hw_perf_event_destroy;
383
384 if (err)
385 goto out;
386
387 /* Access per-CPU sampling information (query sampling info) */
388 /*
389 * The event->cpu value can be -1 to count on every CPU, for example,
390 * when attaching to a task. If this is specified, use the query
391 * sampling info from the current CPU, otherwise use event->cpu to
392 * retrieve the per-CPU information.
393 * Later, cpuhw indicates whether to allocate sampling buffers for a
394 * particular CPU (cpuhw!=NULL) or each online CPU (cpuw==NULL).
395 */
396 memset(&si, 0, sizeof(si));
397 cpuhw = NULL;
398 if (event->cpu == -1)
399 qsi(&si);
400 else {
401 /* Event is pinned to a particular CPU, retrieve the per-CPU
402 * sampling structure for accessing the CPU-specific QSI.
403 */
404 cpuhw = &per_cpu(cpu_hw_sf, event->cpu);
405 si = cpuhw->qsi;
406 }
407
408 /* Check sampling facility authorization and, if not authorized,
409 * fall back to other PMUs. It is safe to check any CPU because
410 * the authorization is identical for all configured CPUs.
411 */
412 if (!si.as) {
413 err = -ENOENT;
414 goto out;
415 }
416
417 /* The sampling information (si) contains information about the
418 * min/max sampling intervals and the CPU speed. So calculate the
419 * correct sampling interval and avoid the whole period adjust
420 * feedback loop.
421 */
422 rate = 0;
423 if (attr->freq) {
424 rate = freq_to_sample_rate(&si, attr->sample_freq);
425 rate = hw_limit_rate(&si, rate);
426 attr->freq = 0;
427 attr->sample_period = rate;
428 } else {
429 /* The min/max sampling rates specifies the valid range
430 * of sample periods. If the specified sample period is
431 * out of range, limit the period to the range boundary.
432 */
433 rate = hw_limit_rate(&si, hwc->sample_period);
434
435 /* The perf core maintains a maximum sample rate that is
436 * configurable through the sysctl interface. Ensure the
437 * sampling rate does not exceed this value. This also helps
438 * to avoid throttling when pushing samples with
439 * perf_event_overflow().
440 */
441 if (sample_rate_to_freq(&si, rate) >
442 sysctl_perf_event_sample_rate) {
443 err = -EINVAL;
444 debug_sprintf_event(sfdbg, 1, "Sampling rate exceeds maximum perf sample rate\n");
445 goto out;
446 }
447 }
448 SAMPL_RATE(hwc) = rate;
449 hw_init_period(hwc, SAMPL_RATE(hwc));
450
451 /* Allocate the per-CPU sampling buffer using the CPU information
452 * from the event. If the event is not pinned to a particular
453 * CPU (event->cpu == -1; or cpuhw == NULL), allocate sampling
454 * buffers for each online CPU.
455 */
456 if (cpuhw)
457 /* Event is pinned to a particular CPU */
458 err = allocate_sdbt(cpuhw, hwc);
459 else {
460 /* Event is not pinned, allocate sampling buffer on
461 * each online CPU
462 */
463 for_each_online_cpu(cpu) {
464 cpuhw = &per_cpu(cpu_hw_sf, cpu);
465 err = allocate_sdbt(cpuhw, hwc);
466 if (err)
467 break;
468 }
469 }
470out:
471 return err;
472}
473
474static int cpumsf_pmu_event_init(struct perf_event *event)
475{
476 int err;
477
55baa2f8
HB
478 /* No support for taken branch sampling */
479 if (has_branch_stack(event))
480 return -EOPNOTSUPP;
481
482 switch (event->attr.type) {
483 case PERF_TYPE_RAW:
484 if (event->attr.config != PERF_EVENT_CPUM_SF)
485 return -ENOENT;
486 break;
487 case PERF_TYPE_HARDWARE:
488 /* Support sampling of CPU cycles in addition to the
489 * counter facility. However, the counter facility
490 * is more precise and, hence, restrict this PMU to
491 * sampling events only.
492 */
493 if (event->attr.config != PERF_COUNT_HW_CPU_CYCLES)
494 return -ENOENT;
495 if (!is_sampling_event(event))
496 return -ENOENT;
497 break;
498 default:
8c069ff4 499 return -ENOENT;
55baa2f8 500 }
8c069ff4
HB
501
502 if (event->cpu >= nr_cpumask_bits ||
503 (event->cpu >= 0 && !cpu_online(event->cpu)))
504 return -ENODEV;
505
506 err = __hw_perf_event_init(event);
507 if (unlikely(err))
508 if (event->destroy)
509 event->destroy(event);
510 return err;
511}
512
513static void cpumsf_pmu_enable(struct pmu *pmu)
514{
515 struct cpu_hw_sf *cpuhw = &__get_cpu_var(cpu_hw_sf);
516 int err;
517
518 if (cpuhw->flags & PMU_F_ENABLED)
519 return;
520
521 if (cpuhw->flags & PMU_F_ERR_MASK)
522 return;
523
524 cpuhw->flags |= PMU_F_ENABLED;
525 barrier();
526
527 err = lsctl(&cpuhw->lsctl);
528 if (err) {
529 cpuhw->flags &= ~PMU_F_ENABLED;
530 pr_err("Loading sampling controls failed: op=%i err=%i\n",
531 1, err);
532 return;
533 }
534
535 debug_sprintf_event(sfdbg, 6, "pmu_enable: es=%i cs=%i tear=%p dear=%p\n",
536 cpuhw->lsctl.es, cpuhw->lsctl.cs,
537 (void *) cpuhw->lsctl.tear, (void *) cpuhw->lsctl.dear);
538}
539
540static void cpumsf_pmu_disable(struct pmu *pmu)
541{
542 struct cpu_hw_sf *cpuhw = &__get_cpu_var(cpu_hw_sf);
543 struct hws_lsctl_request_block inactive;
544 struct hws_qsi_info_block si;
545 int err;
546
547 if (!(cpuhw->flags & PMU_F_ENABLED))
548 return;
549
550 if (cpuhw->flags & PMU_F_ERR_MASK)
551 return;
552
553 /* Switch off sampling activation control */
554 inactive = cpuhw->lsctl;
555 inactive.cs = 0;
556
557 err = lsctl(&inactive);
558 if (err) {
559 pr_err("Loading sampling controls failed: op=%i err=%i\n",
560 2, err);
561 return;
562 }
563
564 /* Save state of TEAR and DEAR register contents */
565 if (!qsi(&si)) {
566 /* TEAR/DEAR values are valid only if the sampling facility is
567 * enabled. Note that cpumsf_pmu_disable() might be called even
568 * for a disabled sampling facility because cpumsf_pmu_enable()
569 * controls the enable/disable state.
570 */
571 if (si.es) {
572 cpuhw->lsctl.tear = si.tear;
573 cpuhw->lsctl.dear = si.dear;
574 }
575 } else
576 debug_sprintf_event(sfdbg, 3, "cpumsf_pmu_disable: "
577 "qsi() failed with err=%i\n", err);
578
579 cpuhw->flags &= ~PMU_F_ENABLED;
580}
581
582/* perf_push_sample() - Push samples to perf
583 * @event: The perf event
584 * @sample: Hardware sample data
585 *
586 * Use the hardware sample data to create perf event sample. The sample
587 * is the pushed to the event subsystem and the function checks for
588 * possible event overflows. If an event overflow occurs, the PMU is
589 * stopped.
590 *
591 * Return non-zero if an event overflow occurred.
592 */
593static int perf_push_sample(struct perf_event *event,
594 struct hws_data_entry *sample)
595{
596 int overflow;
597 struct pt_regs regs;
598 struct perf_sample_data data;
599
600 /* Skip samples that are invalid or for which the instruction address
601 * is not predictable. For the latter, the wait-state bit is set.
602 */
603 if (sample->I || sample->W)
604 return 0;
605
606 perf_sample_data_init(&data, 0, event->hw.last_period);
607
608 memset(&regs, 0, sizeof(regs));
609 regs.psw.addr = sample->ia;
610 if (sample->T)
611 regs.psw.mask |= PSW_MASK_DAT;
612 if (sample->W)
613 regs.psw.mask |= PSW_MASK_WAIT;
614 if (sample->P)
615 regs.psw.mask |= PSW_MASK_PSTATE;
616 switch (sample->AS) {
617 case 0x0:
618 regs.psw.mask |= PSW_ASC_PRIMARY;
619 break;
620 case 0x1:
621 regs.psw.mask |= PSW_ASC_ACCREG;
622 break;
623 case 0x2:
624 regs.psw.mask |= PSW_ASC_SECONDARY;
625 break;
626 case 0x3:
627 regs.psw.mask |= PSW_ASC_HOME;
628 break;
629 }
630
631 overflow = 0;
632 if (perf_event_overflow(event, &data, &regs)) {
633 overflow = 1;
634 event->pmu->stop(event, 0);
635 debug_sprintf_event(sfdbg, 4, "perf_push_sample: PMU stopped"
636 " because of an event overflow\n");
637 }
638 perf_event_update_userpage(event);
639
640 return overflow;
641}
642
643static void perf_event_count_update(struct perf_event *event, u64 count)
644{
645 local64_add(count, &event->count);
646}
647
648/* hw_collect_samples() - Walk through a sample-data-block and collect samples
649 * @event: The perf event
650 * @sdbt: Sample-data-block table
651 * @overflow: Event overflow counter
652 *
653 * Walks through a sample-data-block and collects hardware sample-data that is
654 * pushed to the perf event subsystem. The overflow reports the number of
655 * samples that has been discarded due to an event overflow.
656 */
657static void hw_collect_samples(struct perf_event *event, unsigned long *sdbt,
658 unsigned long long *overflow)
659{
660 struct hws_data_entry *sample;
661 unsigned long *trailer;
662
663 trailer = trailer_entry_ptr(*sdbt);
664 sample = (struct hws_data_entry *) *sdbt;
665 while ((unsigned long *) sample < trailer) {
666 /* Check for an empty sample */
667 if (!sample->def)
668 break;
669
670 /* Update perf event period */
671 perf_event_count_update(event, SAMPL_RATE(&event->hw));
672
673 /* Check for basic sampling mode */
674 if (sample->def == 0x0001) {
675 /* If an event overflow occurred, the PMU is stopped to
676 * throttle event delivery. Remaining sample data is
677 * discarded.
678 */
679 if (!*overflow)
680 *overflow = perf_push_sample(event, sample);
681 else
682 /* Count discarded samples */
683 *overflow += 1;
684 } else
685 /* Sample slot is not yet written or other record */
686 debug_sprintf_event(sfdbg, 5, "hw_collect_samples: "
687 "Unknown sample data entry format:"
688 " %i\n", sample->def);
689
690 /* Reset sample slot and advance to next sample */
691 sample->def = 0;
692 sample++;
693 }
694}
695
696/* hw_perf_event_update() - Process sampling buffer
697 * @event: The perf event
698 * @flush_all: Flag to also flush partially filled sample-data-blocks
699 *
700 * Processes the sampling buffer and create perf event samples.
701 * The sampling buffer position are retrieved and saved in the TEAR_REG
702 * register of the specified perf event.
703 *
704 * Only full sample-data-blocks are processed. Specify the flash_all flag
705 * to also walk through partially filled sample-data-blocks.
706 *
707 */
708static void hw_perf_event_update(struct perf_event *event, int flush_all)
709{
710 struct hw_perf_event *hwc = &event->hw;
711 struct hws_trailer_entry *te;
712 unsigned long *sdbt;
713 unsigned long long event_overflow, sampl_overflow;
714 int done;
715
716 sdbt = (unsigned long *) TEAR_REG(hwc);
717 done = event_overflow = sampl_overflow = 0;
718 while (!done) {
719 /* Get the trailer entry of the sample-data-block */
720 te = (struct hws_trailer_entry *) trailer_entry_ptr(*sdbt);
721
722 /* Leave loop if no more work to do (block full indicator) */
723 if (!te->f) {
724 done = 1;
725 if (!flush_all)
726 break;
727 }
728
729 /* Check sample overflow count */
730 if (te->overflow) {
731 /* Increment sample overflow counter */
732 sampl_overflow += te->overflow;
733
734 /* XXX: If an sample overflow occurs, increase the
735 * sampling buffer. Set a "realloc" flag because
736 * the sampler must be re-enabled for changing
737 * the sample-data-block-table content.
738 */
739 }
740
741 /* Timestamps are valid for full sample-data-blocks only */
742 debug_sprintf_event(sfdbg, 6, "hw_perf_event_update: sdbt=%p "
743 "overflow=%llu timestamp=0x%llx\n",
744 sdbt, te->overflow,
745 (te->f) ? te->timestamp : 0ULL);
746
747 /* Collect all samples from a single sample-data-block and
748 * flag if an (perf) event overflow happened. If so, the PMU
749 * is stopped and remaining samples will be discarded.
750 */
751 hw_collect_samples(event, sdbt, &event_overflow);
752
753 /* Reset trailer */
754 xchg(&te->overflow, 0);
755 xchg((unsigned char *) te, 0x40);
756
757 /* Advance to next sample-data-block */
758 sdbt++;
759 if (is_link_entry(sdbt))
760 sdbt = get_next_sdbt(sdbt);
761
762 /* Update event hardware registers */
763 TEAR_REG(hwc) = (unsigned long) sdbt;
764
765 /* Stop processing sample-data if all samples of the current
766 * sample-data-block were flushed even if it was not full.
767 */
768 if (flush_all && done)
769 break;
770
771 /* If an event overflow happened, discard samples by
772 * processing any remaining sample-data-blocks.
773 */
774 if (event_overflow)
775 flush_all = 1;
776 }
777
778 if (sampl_overflow || event_overflow)
779 debug_sprintf_event(sfdbg, 4, "hw_perf_event_update: "
780 "overflow stats: sample=%llu event=%llu\n",
781 sampl_overflow, event_overflow);
782}
783
784static void cpumsf_pmu_read(struct perf_event *event)
785{
786 /* Nothing to do ... updates are interrupt-driven */
787}
788
789/* Activate sampling control.
790 * Next call of pmu_enable() starts sampling.
791 */
792static void cpumsf_pmu_start(struct perf_event *event, int flags)
793{
794 struct cpu_hw_sf *cpuhw = &__get_cpu_var(cpu_hw_sf);
795
796 if (WARN_ON_ONCE(!(event->hw.state & PERF_HES_STOPPED)))
797 return;
798
799 if (flags & PERF_EF_RELOAD)
800 WARN_ON_ONCE(!(event->hw.state & PERF_HES_UPTODATE));
801
802 perf_pmu_disable(event->pmu);
803 event->hw.state = 0;
804 cpuhw->lsctl.cs = 1;
805 perf_pmu_enable(event->pmu);
806}
807
808/* Deactivate sampling control.
809 * Next call of pmu_enable() stops sampling.
810 */
811static void cpumsf_pmu_stop(struct perf_event *event, int flags)
812{
813 struct cpu_hw_sf *cpuhw = &__get_cpu_var(cpu_hw_sf);
814
815 if (event->hw.state & PERF_HES_STOPPED)
816 return;
817
818 perf_pmu_disable(event->pmu);
819 cpuhw->lsctl.cs = 0;
820 event->hw.state |= PERF_HES_STOPPED;
821
822 if ((flags & PERF_EF_UPDATE) && !(event->hw.state & PERF_HES_UPTODATE)) {
823 hw_perf_event_update(event, 1);
824 event->hw.state |= PERF_HES_UPTODATE;
825 }
826 perf_pmu_enable(event->pmu);
827}
828
829static int cpumsf_pmu_add(struct perf_event *event, int flags)
830{
831 struct cpu_hw_sf *cpuhw = &__get_cpu_var(cpu_hw_sf);
832 int err;
833
834 if (cpuhw->flags & PMU_F_IN_USE)
835 return -EAGAIN;
836
837 if (!cpuhw->sfb.sdbt)
838 return -EINVAL;
839
840 err = 0;
841 perf_pmu_disable(event->pmu);
842
843 event->hw.state = PERF_HES_UPTODATE | PERF_HES_STOPPED;
844
845 /* Set up sampling controls. Always program the sampling register
846 * using the SDB-table start. Reset TEAR_REG event hardware register
847 * that is used by hw_perf_event_update() to store the sampling buffer
848 * position after samples have been flushed.
849 */
850 cpuhw->lsctl.s = 0;
851 cpuhw->lsctl.h = 1;
852 cpuhw->lsctl.tear = cpuhw->sfb.sdbt;
853 cpuhw->lsctl.dear = *(unsigned long *) cpuhw->sfb.sdbt;
854 cpuhw->lsctl.interval = SAMPL_RATE(&event->hw);
855 hw_reset_registers(&event->hw, cpuhw->sfb.sdbt);
856
857 /* Ensure sampling functions are in the disabled state. If disabled,
858 * switch on sampling enable control. */
859 if (WARN_ON_ONCE(cpuhw->lsctl.es == 1)) {
860 err = -EAGAIN;
861 goto out;
862 }
863 cpuhw->lsctl.es = 1;
864
865 /* Set in_use flag and store event */
866 event->hw.idx = 0; /* only one sampling event per CPU supported */
867 cpuhw->event = event;
868 cpuhw->flags |= PMU_F_IN_USE;
869
870 if (flags & PERF_EF_START)
871 cpumsf_pmu_start(event, PERF_EF_RELOAD);
872out:
873 perf_event_update_userpage(event);
874 perf_pmu_enable(event->pmu);
875 return err;
876}
877
878static void cpumsf_pmu_del(struct perf_event *event, int flags)
879{
880 struct cpu_hw_sf *cpuhw = &__get_cpu_var(cpu_hw_sf);
881
882 perf_pmu_disable(event->pmu);
883 cpumsf_pmu_stop(event, PERF_EF_UPDATE);
884
885 cpuhw->lsctl.es = 0;
886 cpuhw->flags &= ~PMU_F_IN_USE;
887 cpuhw->event = NULL;
888
889 perf_event_update_userpage(event);
890 perf_pmu_enable(event->pmu);
891}
892
893static int cpumsf_pmu_event_idx(struct perf_event *event)
894{
895 return event->hw.idx;
896}
897
898CPUMF_EVENT_ATTR(SF, SF_CYCLES_BASIC, PERF_EVENT_CPUM_SF);
899
900static struct attribute *cpumsf_pmu_events_attr[] = {
901 CPUMF_EVENT_PTR(SF, SF_CYCLES_BASIC),
902 NULL,
903};
904
905PMU_FORMAT_ATTR(event, "config:0-63");
906
907static struct attribute *cpumsf_pmu_format_attr[] = {
908 &format_attr_event.attr,
909 NULL,
910};
911
912static struct attribute_group cpumsf_pmu_events_group = {
913 .name = "events",
914 .attrs = cpumsf_pmu_events_attr,
915};
916static struct attribute_group cpumsf_pmu_format_group = {
917 .name = "format",
918 .attrs = cpumsf_pmu_format_attr,
919};
920static const struct attribute_group *cpumsf_pmu_attr_groups[] = {
921 &cpumsf_pmu_events_group,
922 &cpumsf_pmu_format_group,
923 NULL,
924};
925
926static struct pmu cpumf_sampling = {
927 .pmu_enable = cpumsf_pmu_enable,
928 .pmu_disable = cpumsf_pmu_disable,
929
930 .event_init = cpumsf_pmu_event_init,
931 .add = cpumsf_pmu_add,
932 .del = cpumsf_pmu_del,
933
934 .start = cpumsf_pmu_start,
935 .stop = cpumsf_pmu_stop,
936 .read = cpumsf_pmu_read,
937
938 .event_idx = cpumsf_pmu_event_idx,
939 .attr_groups = cpumsf_pmu_attr_groups,
940};
941
942static void cpumf_measurement_alert(struct ext_code ext_code,
943 unsigned int alert, unsigned long unused)
944{
945 struct cpu_hw_sf *cpuhw;
946
947 if (!(alert & CPU_MF_INT_SF_MASK))
948 return;
949 inc_irq_stat(IRQEXT_CMS);
950 cpuhw = &__get_cpu_var(cpu_hw_sf);
951
952 /* Measurement alerts are shared and might happen when the PMU
953 * is not reserved. Ignore these alerts in this case. */
954 if (!(cpuhw->flags & PMU_F_RESERVED))
955 return;
956
957 /* The processing below must take care of multiple alert events that
958 * might be indicated concurrently. */
959
960 /* Program alert request */
961 if (alert & CPU_MF_INT_SF_PRA) {
962 if (cpuhw->flags & PMU_F_IN_USE)
963 hw_perf_event_update(cpuhw->event, 0);
964 else
965 WARN_ON_ONCE(!(cpuhw->flags & PMU_F_IN_USE));
966 }
967
968 /* Report measurement alerts only for non-PRA codes */
969 if (alert != CPU_MF_INT_SF_PRA)
970 debug_sprintf_event(sfdbg, 6, "measurement alert: 0x%x\n", alert);
971
972 /* Sampling authorization change request */
973 if (alert & CPU_MF_INT_SF_SACA)
974 qsi(&cpuhw->qsi);
975
976 /* Loss of sample data due to high-priority machine activities */
977 if (alert & CPU_MF_INT_SF_LSDA) {
978 pr_err("Sample data was lost\n");
979 cpuhw->flags |= PMU_F_ERR_LSDA;
980 sf_disable();
981 }
982
983 /* Invalid sampling buffer entry */
984 if (alert & (CPU_MF_INT_SF_IAE|CPU_MF_INT_SF_ISE)) {
985 pr_err("A sampling buffer entry is incorrect (alert=0x%x)\n",
986 alert);
987 cpuhw->flags |= PMU_F_ERR_IBE;
988 sf_disable();
989 }
990}
991
992static int __cpuinit cpumf_pmu_notifier(struct notifier_block *self,
993 unsigned long action, void *hcpu)
994{
995 unsigned int cpu = (long) hcpu;
996 int flags;
997
998 /* Ignore the notification if no events are scheduled on the PMU.
999 * This might be racy...
1000 */
1001 if (!atomic_read(&num_events))
1002 return NOTIFY_OK;
1003
1004 switch (action & ~CPU_TASKS_FROZEN) {
1005 case CPU_ONLINE:
1006 case CPU_ONLINE_FROZEN:
1007 flags = PMC_INIT;
1008 smp_call_function_single(cpu, setup_pmc_cpu, &flags, 1);
1009 break;
1010 case CPU_DOWN_PREPARE:
1011 flags = PMC_RELEASE;
1012 smp_call_function_single(cpu, setup_pmc_cpu, &flags, 1);
1013 break;
1014 default:
1015 break;
1016 }
1017
1018 return NOTIFY_OK;
1019}
1020
1021static int __init init_cpum_sampling_pmu(void)
1022{
1023 int err;
1024
1025 if (!cpum_sf_avail())
1026 return -ENODEV;
1027
1028 sfdbg = debug_register(KMSG_COMPONENT, 2, 1, 80);
1029 if (!sfdbg)
1030 pr_err("Registering for s390dbf failed\n");
1031 debug_register_view(sfdbg, &debug_sprintf_view);
1032
1033 err = register_external_interrupt(0x1407, cpumf_measurement_alert);
1034 if (err) {
1035 pr_err("Failed to register for CPU-measurement alerts\n");
1036 goto out;
1037 }
1038
1039 err = perf_pmu_register(&cpumf_sampling, "cpum_sf", PERF_TYPE_RAW);
1040 if (err) {
1041 pr_err("Failed to register cpum_sf pmu\n");
1042 unregister_external_interrupt(0x1407, cpumf_measurement_alert);
1043 goto out;
1044 }
1045 perf_cpu_notifier(cpumf_pmu_notifier);
1046out:
1047 return err;
1048}
1049arch_initcall(init_cpum_sampling_pmu);