]> git.proxmox.com Git - mirror_ubuntu-artful-kernel.git/blob - arch/powerpc/perf/hv-24x7.c
77b958f213a0340ecb9962d6f1d33c362d62bca5
[mirror_ubuntu-artful-kernel.git] / arch / powerpc / perf / hv-24x7.c
1 /*
2 * Hypervisor supplied "24x7" performance counter support
3 *
4 * Author: Cody P Schafer <cody@linux.vnet.ibm.com>
5 * Copyright 2014 IBM Corporation.
6 *
7 * This program is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License
9 * as published by the Free Software Foundation; either version
10 * 2 of the License, or (at your option) any later version.
11 */
12
13 #define pr_fmt(fmt) "hv-24x7: " fmt
14
15 #include <linux/perf_event.h>
16 #include <linux/rbtree.h>
17 #include <linux/module.h>
18 #include <linux/slab.h>
19 #include <linux/vmalloc.h>
20
21 #include <asm/firmware.h>
22 #include <asm/hvcall.h>
23 #include <asm/io.h>
24 #include <linux/byteorder/generic.h>
25
26 #include "hv-24x7.h"
27 #include "hv-24x7-catalog.h"
28 #include "hv-common.h"
29
30 static const char *event_domain_suffix(unsigned domain)
31 {
32 switch (domain) {
33 #define DOMAIN(n, v, x, c) \
34 case HV_PERF_DOMAIN_##n: \
35 return "__" #n;
36 #include "hv-24x7-domains.h"
37 #undef DOMAIN
38 default:
39 WARN(1, "unknown domain %d\n", domain);
40 return "__UNKNOWN_DOMAIN_SUFFIX";
41 }
42 }
43
44 static bool domain_is_valid(unsigned domain)
45 {
46 switch (domain) {
47 #define DOMAIN(n, v, x, c) \
48 case HV_PERF_DOMAIN_##n: \
49 /* fall through */
50 #include "hv-24x7-domains.h"
51 #undef DOMAIN
52 return true;
53 default:
54 return false;
55 }
56 }
57
58 static bool is_physical_domain(unsigned domain)
59 {
60 switch (domain) {
61 #define DOMAIN(n, v, x, c) \
62 case HV_PERF_DOMAIN_##n: \
63 return c;
64 #include "hv-24x7-domains.h"
65 #undef DOMAIN
66 default:
67 return false;
68 }
69 }
70
71 static bool catalog_entry_domain_is_valid(unsigned domain)
72 {
73 return is_physical_domain(domain);
74 }
75
76 /*
77 * TODO: Merging events:
78 * - Think of the hcall as an interface to a 4d array of counters:
79 * - x = domains
80 * - y = indexes in the domain (core, chip, vcpu, node, etc)
81 * - z = offset into the counter space
82 * - w = lpars (guest vms, "logical partitions")
83 * - A single request is: x,y,y_last,z,z_last,w,w_last
84 * - this means we can retrieve a rectangle of counters in y,z for a single x.
85 *
86 * - Things to consider (ignoring w):
87 * - input cost_per_request = 16
88 * - output cost_per_result(ys,zs) = 8 + 8 * ys + ys * zs
89 * - limited number of requests per hcall (must fit into 4K bytes)
90 * - 4k = 16 [buffer header] - 16 [request size] * request_count
91 * - 255 requests per hcall
92 * - sometimes it will be more efficient to read extra data and discard
93 */
94
95 /*
96 * Example usage:
97 * perf stat -e 'hv_24x7/domain=2,offset=8,vcpu=0,lpar=0xffffffff/'
98 */
99
100 /* u3 0-6, one of HV_24X7_PERF_DOMAIN */
101 EVENT_DEFINE_RANGE_FORMAT(domain, config, 0, 3);
102 /* u16 */
103 EVENT_DEFINE_RANGE_FORMAT(core, config, 16, 31);
104 EVENT_DEFINE_RANGE_FORMAT(chip, config, 16, 31);
105 EVENT_DEFINE_RANGE_FORMAT(vcpu, config, 16, 31);
106 /* u32, see "data_offset" */
107 EVENT_DEFINE_RANGE_FORMAT(offset, config, 32, 63);
108 /* u16 */
109 EVENT_DEFINE_RANGE_FORMAT(lpar, config1, 0, 15);
110
111 EVENT_DEFINE_RANGE(reserved1, config, 4, 15);
112 EVENT_DEFINE_RANGE(reserved2, config1, 16, 63);
113 EVENT_DEFINE_RANGE(reserved3, config2, 0, 63);
114
115 static struct attribute *format_attrs[] = {
116 &format_attr_domain.attr,
117 &format_attr_offset.attr,
118 &format_attr_core.attr,
119 &format_attr_chip.attr,
120 &format_attr_vcpu.attr,
121 &format_attr_lpar.attr,
122 NULL,
123 };
124
125 static struct attribute_group format_group = {
126 .name = "format",
127 .attrs = format_attrs,
128 };
129
130 static struct attribute_group event_group = {
131 .name = "events",
132 /* .attrs is set in init */
133 };
134
135 static struct attribute_group event_desc_group = {
136 .name = "event_descs",
137 /* .attrs is set in init */
138 };
139
140 static struct attribute_group event_long_desc_group = {
141 .name = "event_long_descs",
142 /* .attrs is set in init */
143 };
144
145 static struct kmem_cache *hv_page_cache;
146
147 DEFINE_PER_CPU(int, hv_24x7_txn_flags);
148 DEFINE_PER_CPU(int, hv_24x7_txn_err);
149
150 struct hv_24x7_hw {
151 struct perf_event *events[255];
152 };
153
154 DEFINE_PER_CPU(struct hv_24x7_hw, hv_24x7_hw);
155
156 /*
157 * request_buffer and result_buffer are not required to be 4k aligned,
158 * but are not allowed to cross any 4k boundary. Aligning them to 4k is
159 * the simplest way to ensure that.
160 */
161 #define H24x7_DATA_BUFFER_SIZE 4096
162 DEFINE_PER_CPU(char, hv_24x7_reqb[H24x7_DATA_BUFFER_SIZE]) __aligned(4096);
163 DEFINE_PER_CPU(char, hv_24x7_resb[H24x7_DATA_BUFFER_SIZE]) __aligned(4096);
164
165 static char *event_name(struct hv_24x7_event_data *ev, int *len)
166 {
167 *len = be16_to_cpu(ev->event_name_len) - 2;
168 return (char *)ev->remainder;
169 }
170
171 static char *event_desc(struct hv_24x7_event_data *ev, int *len)
172 {
173 unsigned nl = be16_to_cpu(ev->event_name_len);
174 __be16 *desc_len = (__be16 *)(ev->remainder + nl - 2);
175
176 *len = be16_to_cpu(*desc_len) - 2;
177 return (char *)ev->remainder + nl;
178 }
179
180 static char *event_long_desc(struct hv_24x7_event_data *ev, int *len)
181 {
182 unsigned nl = be16_to_cpu(ev->event_name_len);
183 __be16 *desc_len_ = (__be16 *)(ev->remainder + nl - 2);
184 unsigned desc_len = be16_to_cpu(*desc_len_);
185 __be16 *long_desc_len = (__be16 *)(ev->remainder + nl + desc_len - 2);
186
187 *len = be16_to_cpu(*long_desc_len) - 2;
188 return (char *)ev->remainder + nl + desc_len;
189 }
190
191 static bool event_fixed_portion_is_within(struct hv_24x7_event_data *ev,
192 void *end)
193 {
194 void *start = ev;
195
196 return (start + offsetof(struct hv_24x7_event_data, remainder)) < end;
197 }
198
199 /*
200 * Things we don't check:
201 * - padding for desc, name, and long/detailed desc is required to be '\0'
202 * bytes.
203 *
204 * Return NULL if we pass end,
205 * Otherwise return the address of the byte just following the event.
206 */
207 static void *event_end(struct hv_24x7_event_data *ev, void *end)
208 {
209 void *start = ev;
210 __be16 *dl_, *ldl_;
211 unsigned dl, ldl;
212 unsigned nl = be16_to_cpu(ev->event_name_len);
213
214 if (nl < 2) {
215 pr_debug("%s: name length too short: %d", __func__, nl);
216 return NULL;
217 }
218
219 if (start + nl > end) {
220 pr_debug("%s: start=%p + nl=%u > end=%p",
221 __func__, start, nl, end);
222 return NULL;
223 }
224
225 dl_ = (__be16 *)(ev->remainder + nl - 2);
226 if (!IS_ALIGNED((uintptr_t)dl_, 2))
227 pr_warn("desc len not aligned %p", dl_);
228 dl = be16_to_cpu(*dl_);
229 if (dl < 2) {
230 pr_debug("%s: desc len too short: %d", __func__, dl);
231 return NULL;
232 }
233
234 if (start + nl + dl > end) {
235 pr_debug("%s: (start=%p + nl=%u + dl=%u)=%p > end=%p",
236 __func__, start, nl, dl, start + nl + dl, end);
237 return NULL;
238 }
239
240 ldl_ = (__be16 *)(ev->remainder + nl + dl - 2);
241 if (!IS_ALIGNED((uintptr_t)ldl_, 2))
242 pr_warn("long desc len not aligned %p", ldl_);
243 ldl = be16_to_cpu(*ldl_);
244 if (ldl < 2) {
245 pr_debug("%s: long desc len too short (ldl=%u)",
246 __func__, ldl);
247 return NULL;
248 }
249
250 if (start + nl + dl + ldl > end) {
251 pr_debug("%s: start=%p + nl=%u + dl=%u + ldl=%u > end=%p",
252 __func__, start, nl, dl, ldl, end);
253 return NULL;
254 }
255
256 return start + nl + dl + ldl;
257 }
258
259 static unsigned long h_get_24x7_catalog_page_(unsigned long phys_4096,
260 unsigned long version,
261 unsigned long index)
262 {
263 pr_devel("h_get_24x7_catalog_page(0x%lx, %lu, %lu)",
264 phys_4096, version, index);
265
266 WARN_ON(!IS_ALIGNED(phys_4096, 4096));
267
268 return plpar_hcall_norets(H_GET_24X7_CATALOG_PAGE,
269 phys_4096, version, index);
270 }
271
272 static unsigned long h_get_24x7_catalog_page(char page[],
273 u64 version, u32 index)
274 {
275 return h_get_24x7_catalog_page_(virt_to_phys(page),
276 version, index);
277 }
278
279 static unsigned core_domains[] = {
280 HV_PERF_DOMAIN_PHYS_CORE,
281 HV_PERF_DOMAIN_VCPU_HOME_CORE,
282 HV_PERF_DOMAIN_VCPU_HOME_CHIP,
283 HV_PERF_DOMAIN_VCPU_HOME_NODE,
284 HV_PERF_DOMAIN_VCPU_REMOTE_NODE,
285 };
286 /* chip event data always yeilds a single event, core yeilds multiple */
287 #define MAX_EVENTS_PER_EVENT_DATA ARRAY_SIZE(core_domains)
288
289 static char *event_fmt(struct hv_24x7_event_data *event, unsigned domain)
290 {
291 const char *sindex;
292 const char *lpar;
293
294 switch (domain) {
295 case HV_PERF_DOMAIN_PHYS_CHIP:
296 lpar = "0x0";
297 sindex = "chip";
298 break;
299 case HV_PERF_DOMAIN_PHYS_CORE:
300 lpar = "0x0";
301 sindex = "core";
302 break;
303 default:
304 lpar = "?";
305 sindex = "vcpu";
306 }
307
308 return kasprintf(GFP_KERNEL,
309 "domain=0x%x,offset=0x%x,%s=?,lpar=%s",
310 domain,
311 be16_to_cpu(event->event_counter_offs) +
312 be16_to_cpu(event->event_group_record_offs),
313 sindex,
314 lpar);
315 }
316
317 /* Avoid trusting fw to NUL terminate strings */
318 static char *memdup_to_str(char *maybe_str, int max_len, gfp_t gfp)
319 {
320 return kasprintf(gfp, "%.*s", max_len, maybe_str);
321 }
322
323 static ssize_t device_show_string(struct device *dev,
324 struct device_attribute *attr, char *buf)
325 {
326 struct dev_ext_attribute *d;
327
328 d = container_of(attr, struct dev_ext_attribute, attr);
329
330 return sprintf(buf, "%s\n", (char *)d->var);
331 }
332
333 static struct attribute *device_str_attr_create_(char *name, char *str)
334 {
335 struct dev_ext_attribute *attr = kzalloc(sizeof(*attr), GFP_KERNEL);
336
337 if (!attr)
338 return NULL;
339
340 sysfs_attr_init(&attr->attr.attr);
341
342 attr->var = str;
343 attr->attr.attr.name = name;
344 attr->attr.attr.mode = 0444;
345 attr->attr.show = device_show_string;
346
347 return &attr->attr.attr;
348 }
349
350 static struct attribute *device_str_attr_create(char *name, int name_max,
351 int name_nonce,
352 char *str, size_t str_max)
353 {
354 char *n;
355 char *s = memdup_to_str(str, str_max, GFP_KERNEL);
356 struct attribute *a;
357
358 if (!s)
359 return NULL;
360
361 if (!name_nonce)
362 n = kasprintf(GFP_KERNEL, "%.*s", name_max, name);
363 else
364 n = kasprintf(GFP_KERNEL, "%.*s__%d", name_max, name,
365 name_nonce);
366 if (!n)
367 goto out_s;
368
369 a = device_str_attr_create_(n, s);
370 if (!a)
371 goto out_n;
372
373 return a;
374 out_n:
375 kfree(n);
376 out_s:
377 kfree(s);
378 return NULL;
379 }
380
381 static void device_str_attr_destroy(struct attribute *attr)
382 {
383 struct dev_ext_attribute *d;
384
385 d = container_of(attr, struct dev_ext_attribute, attr.attr);
386 kfree(d->var);
387 kfree(d->attr.attr.name);
388 kfree(d);
389 }
390
391 static struct attribute *event_to_attr(unsigned ix,
392 struct hv_24x7_event_data *event,
393 unsigned domain,
394 int nonce)
395 {
396 int event_name_len;
397 char *ev_name, *a_ev_name, *val;
398 const char *ev_suffix;
399 struct attribute *attr;
400
401 if (!domain_is_valid(domain)) {
402 pr_warn("catalog event %u has invalid domain %u\n",
403 ix, domain);
404 return NULL;
405 }
406
407 val = event_fmt(event, domain);
408 if (!val)
409 return NULL;
410
411 ev_suffix = event_domain_suffix(domain);
412 ev_name = event_name(event, &event_name_len);
413 if (!nonce)
414 a_ev_name = kasprintf(GFP_KERNEL, "%.*s%s",
415 (int)event_name_len, ev_name, ev_suffix);
416 else
417 a_ev_name = kasprintf(GFP_KERNEL, "%.*s%s__%d",
418 (int)event_name_len, ev_name, ev_suffix, nonce);
419
420 if (!a_ev_name)
421 goto out_val;
422
423 attr = device_str_attr_create_(a_ev_name, val);
424 if (!attr)
425 goto out_name;
426
427 return attr;
428 out_name:
429 kfree(a_ev_name);
430 out_val:
431 kfree(val);
432 return NULL;
433 }
434
435 static struct attribute *event_to_desc_attr(struct hv_24x7_event_data *event,
436 int nonce)
437 {
438 int nl, dl;
439 char *name = event_name(event, &nl);
440 char *desc = event_desc(event, &dl);
441
442 /* If there isn't a description, don't create the sysfs file */
443 if (!dl)
444 return NULL;
445
446 return device_str_attr_create(name, nl, nonce, desc, dl);
447 }
448
449 static struct attribute *
450 event_to_long_desc_attr(struct hv_24x7_event_data *event, int nonce)
451 {
452 int nl, dl;
453 char *name = event_name(event, &nl);
454 char *desc = event_long_desc(event, &dl);
455
456 /* If there isn't a description, don't create the sysfs file */
457 if (!dl)
458 return NULL;
459
460 return device_str_attr_create(name, nl, nonce, desc, dl);
461 }
462
463 static ssize_t event_data_to_attrs(unsigned ix, struct attribute **attrs,
464 struct hv_24x7_event_data *event, int nonce)
465 {
466 unsigned i;
467
468 switch (event->domain) {
469 case HV_PERF_DOMAIN_PHYS_CHIP:
470 *attrs = event_to_attr(ix, event, event->domain, nonce);
471 return 1;
472 case HV_PERF_DOMAIN_PHYS_CORE:
473 for (i = 0; i < ARRAY_SIZE(core_domains); i++) {
474 attrs[i] = event_to_attr(ix, event, core_domains[i],
475 nonce);
476 if (!attrs[i]) {
477 pr_warn("catalog event %u: individual attr %u "
478 "creation failure\n", ix, i);
479 for (; i; i--)
480 device_str_attr_destroy(attrs[i - 1]);
481 return -1;
482 }
483 }
484 return i;
485 default:
486 pr_warn("catalog event %u: domain %u is not allowed in the "
487 "catalog\n", ix, event->domain);
488 return -1;
489 }
490 }
491
492 static size_t event_to_attr_ct(struct hv_24x7_event_data *event)
493 {
494 switch (event->domain) {
495 case HV_PERF_DOMAIN_PHYS_CHIP:
496 return 1;
497 case HV_PERF_DOMAIN_PHYS_CORE:
498 return ARRAY_SIZE(core_domains);
499 default:
500 return 0;
501 }
502 }
503
504 static unsigned long vmalloc_to_phys(void *v)
505 {
506 struct page *p = vmalloc_to_page(v);
507
508 BUG_ON(!p);
509 return page_to_phys(p) + offset_in_page(v);
510 }
511
512 /* */
513 struct event_uniq {
514 struct rb_node node;
515 const char *name;
516 int nl;
517 unsigned ct;
518 unsigned domain;
519 };
520
521 static int memord(const void *d1, size_t s1, const void *d2, size_t s2)
522 {
523 if (s1 < s2)
524 return 1;
525 if (s2 > s1)
526 return -1;
527
528 return memcmp(d1, d2, s1);
529 }
530
531 static int ev_uniq_ord(const void *v1, size_t s1, unsigned d1, const void *v2,
532 size_t s2, unsigned d2)
533 {
534 int r = memord(v1, s1, v2, s2);
535
536 if (r)
537 return r;
538 if (d1 > d2)
539 return 1;
540 if (d2 > d1)
541 return -1;
542 return 0;
543 }
544
545 static int event_uniq_add(struct rb_root *root, const char *name, int nl,
546 unsigned domain)
547 {
548 struct rb_node **new = &(root->rb_node), *parent = NULL;
549 struct event_uniq *data;
550
551 /* Figure out where to put new node */
552 while (*new) {
553 struct event_uniq *it;
554 int result;
555
556 it = container_of(*new, struct event_uniq, node);
557 result = ev_uniq_ord(name, nl, domain, it->name, it->nl,
558 it->domain);
559
560 parent = *new;
561 if (result < 0)
562 new = &((*new)->rb_left);
563 else if (result > 0)
564 new = &((*new)->rb_right);
565 else {
566 it->ct++;
567 pr_info("found a duplicate event %.*s, ct=%u\n", nl,
568 name, it->ct);
569 return it->ct;
570 }
571 }
572
573 data = kmalloc(sizeof(*data), GFP_KERNEL);
574 if (!data)
575 return -ENOMEM;
576
577 *data = (struct event_uniq) {
578 .name = name,
579 .nl = nl,
580 .ct = 0,
581 .domain = domain,
582 };
583
584 /* Add new node and rebalance tree. */
585 rb_link_node(&data->node, parent, new);
586 rb_insert_color(&data->node, root);
587
588 /* data->ct */
589 return 0;
590 }
591
592 static void event_uniq_destroy(struct rb_root *root)
593 {
594 /*
595 * the strings we point to are in the giant block of memory filled by
596 * the catalog, and are freed separately.
597 */
598 struct event_uniq *pos, *n;
599
600 rbtree_postorder_for_each_entry_safe(pos, n, root, node)
601 kfree(pos);
602 }
603
604
605 /*
606 * ensure the event structure's sizes are self consistent and don't cause us to
607 * read outside of the event
608 *
609 * On success, return the event length in bytes.
610 * Otherwise, return -1 (and print as appropriate).
611 */
612 static ssize_t catalog_event_len_validate(struct hv_24x7_event_data *event,
613 size_t event_idx,
614 size_t event_data_bytes,
615 size_t event_entry_count,
616 size_t offset, void *end)
617 {
618 ssize_t ev_len;
619 void *ev_end, *calc_ev_end;
620
621 if (offset >= event_data_bytes)
622 return -1;
623
624 if (event_idx >= event_entry_count) {
625 pr_devel("catalog event data has %zu bytes of padding after last event\n",
626 event_data_bytes - offset);
627 return -1;
628 }
629
630 if (!event_fixed_portion_is_within(event, end)) {
631 pr_warn("event %zu fixed portion is not within range\n",
632 event_idx);
633 return -1;
634 }
635
636 ev_len = be16_to_cpu(event->length);
637
638 if (ev_len % 16)
639 pr_info("event %zu has length %zu not divisible by 16: event=%pK\n",
640 event_idx, ev_len, event);
641
642 ev_end = (__u8 *)event + ev_len;
643 if (ev_end > end) {
644 pr_warn("event %zu has .length=%zu, ends after buffer end: ev_end=%pK > end=%pK, offset=%zu\n",
645 event_idx, ev_len, ev_end, end,
646 offset);
647 return -1;
648 }
649
650 calc_ev_end = event_end(event, end);
651 if (!calc_ev_end) {
652 pr_warn("event %zu has a calculated length which exceeds buffer length %zu: event=%pK end=%pK, offset=%zu\n",
653 event_idx, event_data_bytes, event, end,
654 offset);
655 return -1;
656 }
657
658 if (calc_ev_end > ev_end) {
659 pr_warn("event %zu exceeds it's own length: event=%pK, end=%pK, offset=%zu, calc_ev_end=%pK\n",
660 event_idx, event, ev_end, offset, calc_ev_end);
661 return -1;
662 }
663
664 return ev_len;
665 }
666
667 #define MAX_4K (SIZE_MAX / 4096)
668
669 static int create_events_from_catalog(struct attribute ***events_,
670 struct attribute ***event_descs_,
671 struct attribute ***event_long_descs_)
672 {
673 unsigned long hret;
674 size_t catalog_len, catalog_page_len, event_entry_count,
675 event_data_len, event_data_offs,
676 event_data_bytes, junk_events, event_idx, event_attr_ct, i,
677 attr_max, event_idx_last, desc_ct, long_desc_ct;
678 ssize_t ct, ev_len;
679 uint32_t catalog_version_num;
680 struct attribute **events, **event_descs, **event_long_descs;
681 struct hv_24x7_catalog_page_0 *page_0 =
682 kmem_cache_alloc(hv_page_cache, GFP_KERNEL);
683 void *page = page_0;
684 void *event_data, *end;
685 struct hv_24x7_event_data *event;
686 struct rb_root ev_uniq = RB_ROOT;
687 int ret = 0;
688
689 if (!page) {
690 ret = -ENOMEM;
691 goto e_out;
692 }
693
694 hret = h_get_24x7_catalog_page(page, 0, 0);
695 if (hret) {
696 ret = -EIO;
697 goto e_free;
698 }
699
700 catalog_version_num = be64_to_cpu(page_0->version);
701 catalog_page_len = be32_to_cpu(page_0->length);
702
703 if (MAX_4K < catalog_page_len) {
704 pr_err("invalid page count: %zu\n", catalog_page_len);
705 ret = -EIO;
706 goto e_free;
707 }
708
709 catalog_len = catalog_page_len * 4096;
710
711 event_entry_count = be16_to_cpu(page_0->event_entry_count);
712 event_data_offs = be16_to_cpu(page_0->event_data_offs);
713 event_data_len = be16_to_cpu(page_0->event_data_len);
714
715 pr_devel("cv %zu cl %zu eec %zu edo %zu edl %zu\n",
716 (size_t)catalog_version_num, catalog_len,
717 event_entry_count, event_data_offs, event_data_len);
718
719 if ((MAX_4K < event_data_len)
720 || (MAX_4K < event_data_offs)
721 || (MAX_4K - event_data_offs < event_data_len)) {
722 pr_err("invalid event data offs %zu and/or len %zu\n",
723 event_data_offs, event_data_len);
724 ret = -EIO;
725 goto e_free;
726 }
727
728 if ((event_data_offs + event_data_len) > catalog_page_len) {
729 pr_err("event data %zu-%zu does not fit inside catalog 0-%zu\n",
730 event_data_offs,
731 event_data_offs + event_data_len,
732 catalog_page_len);
733 ret = -EIO;
734 goto e_free;
735 }
736
737 if (SIZE_MAX / MAX_EVENTS_PER_EVENT_DATA - 1 < event_entry_count) {
738 pr_err("event_entry_count %zu is invalid\n",
739 event_entry_count);
740 ret = -EIO;
741 goto e_free;
742 }
743
744 event_data_bytes = event_data_len * 4096;
745
746 /*
747 * event data can span several pages, events can cross between these
748 * pages. Use vmalloc to make this easier.
749 */
750 event_data = vmalloc(event_data_bytes);
751 if (!event_data) {
752 pr_err("could not allocate event data\n");
753 ret = -ENOMEM;
754 goto e_free;
755 }
756
757 end = event_data + event_data_bytes;
758
759 /*
760 * using vmalloc_to_phys() like this only works if PAGE_SIZE is
761 * divisible by 4096
762 */
763 BUILD_BUG_ON(PAGE_SIZE % 4096);
764
765 for (i = 0; i < event_data_len; i++) {
766 hret = h_get_24x7_catalog_page_(
767 vmalloc_to_phys(event_data + i * 4096),
768 catalog_version_num,
769 i + event_data_offs);
770 if (hret) {
771 pr_err("failed to get event data in page %zu\n",
772 i + event_data_offs);
773 ret = -EIO;
774 goto e_event_data;
775 }
776 }
777
778 /*
779 * scan the catalog to determine the number of attributes we need, and
780 * verify it at the same time.
781 */
782 for (junk_events = 0, event = event_data, event_idx = 0, attr_max = 0;
783 ;
784 event_idx++, event = (void *)event + ev_len) {
785 size_t offset = (void *)event - (void *)event_data;
786 char *name;
787 int nl;
788
789 ev_len = catalog_event_len_validate(event, event_idx,
790 event_data_bytes,
791 event_entry_count,
792 offset, end);
793 if (ev_len < 0)
794 break;
795
796 name = event_name(event, &nl);
797
798 if (event->event_group_record_len == 0) {
799 pr_devel("invalid event %zu (%.*s): group_record_len == 0, skipping\n",
800 event_idx, nl, name);
801 junk_events++;
802 continue;
803 }
804
805 if (!catalog_entry_domain_is_valid(event->domain)) {
806 pr_info("event %zu (%.*s) has invalid domain %d\n",
807 event_idx, nl, name, event->domain);
808 junk_events++;
809 continue;
810 }
811
812 attr_max += event_to_attr_ct(event);
813 }
814
815 event_idx_last = event_idx;
816 if (event_idx_last != event_entry_count)
817 pr_warn("event buffer ended before listed # of events were parsed (got %zu, wanted %zu, junk %zu)\n",
818 event_idx_last, event_entry_count, junk_events);
819
820 events = kmalloc_array(attr_max + 1, sizeof(*events), GFP_KERNEL);
821 if (!events) {
822 ret = -ENOMEM;
823 goto e_event_data;
824 }
825
826 event_descs = kmalloc_array(event_idx + 1, sizeof(*event_descs),
827 GFP_KERNEL);
828 if (!event_descs) {
829 ret = -ENOMEM;
830 goto e_event_attrs;
831 }
832
833 event_long_descs = kmalloc_array(event_idx + 1,
834 sizeof(*event_long_descs), GFP_KERNEL);
835 if (!event_long_descs) {
836 ret = -ENOMEM;
837 goto e_event_descs;
838 }
839
840 /* Iterate over the catalog filling in the attribute vector */
841 for (junk_events = 0, event_attr_ct = 0, desc_ct = 0, long_desc_ct = 0,
842 event = event_data, event_idx = 0;
843 event_idx < event_idx_last;
844 event_idx++, ev_len = be16_to_cpu(event->length),
845 event = (void *)event + ev_len) {
846 char *name;
847 int nl;
848 int nonce;
849 /*
850 * these are the only "bad" events that are intermixed and that
851 * we can ignore without issue. make sure to skip them here
852 */
853 if (event->event_group_record_len == 0)
854 continue;
855 if (!catalog_entry_domain_is_valid(event->domain))
856 continue;
857
858 name = event_name(event, &nl);
859 nonce = event_uniq_add(&ev_uniq, name, nl, event->domain);
860 ct = event_data_to_attrs(event_idx, events + event_attr_ct,
861 event, nonce);
862 if (ct <= 0) {
863 pr_warn("event %zu (%.*s) creation failure, skipping\n",
864 event_idx, nl, name);
865 junk_events++;
866 } else {
867 event_attr_ct += ct;
868 event_descs[desc_ct] = event_to_desc_attr(event, nonce);
869 if (event_descs[desc_ct])
870 desc_ct++;
871 event_long_descs[long_desc_ct] =
872 event_to_long_desc_attr(event, nonce);
873 if (event_long_descs[long_desc_ct])
874 long_desc_ct++;
875 }
876 }
877
878 pr_info("read %zu catalog entries, created %zu event attrs (%zu failures), %zu descs\n",
879 event_idx, event_attr_ct, junk_events, desc_ct);
880
881 events[event_attr_ct] = NULL;
882 event_descs[desc_ct] = NULL;
883 event_long_descs[long_desc_ct] = NULL;
884
885 event_uniq_destroy(&ev_uniq);
886 vfree(event_data);
887 kmem_cache_free(hv_page_cache, page);
888
889 *events_ = events;
890 *event_descs_ = event_descs;
891 *event_long_descs_ = event_long_descs;
892 return 0;
893
894 e_event_descs:
895 kfree(event_descs);
896 e_event_attrs:
897 kfree(events);
898 e_event_data:
899 vfree(event_data);
900 e_free:
901 kmem_cache_free(hv_page_cache, page);
902 e_out:
903 *events_ = NULL;
904 *event_descs_ = NULL;
905 *event_long_descs_ = NULL;
906 return ret;
907 }
908
909 static ssize_t catalog_read(struct file *filp, struct kobject *kobj,
910 struct bin_attribute *bin_attr, char *buf,
911 loff_t offset, size_t count)
912 {
913 unsigned long hret;
914 ssize_t ret = 0;
915 size_t catalog_len = 0, catalog_page_len = 0;
916 loff_t page_offset = 0;
917 loff_t offset_in_page;
918 size_t copy_len;
919 uint64_t catalog_version_num = 0;
920 void *page = kmem_cache_alloc(hv_page_cache, GFP_USER);
921 struct hv_24x7_catalog_page_0 *page_0 = page;
922
923 if (!page)
924 return -ENOMEM;
925
926 hret = h_get_24x7_catalog_page(page, 0, 0);
927 if (hret) {
928 ret = -EIO;
929 goto e_free;
930 }
931
932 catalog_version_num = be64_to_cpu(page_0->version);
933 catalog_page_len = be32_to_cpu(page_0->length);
934 catalog_len = catalog_page_len * 4096;
935
936 page_offset = offset / 4096;
937 offset_in_page = offset % 4096;
938
939 if (page_offset >= catalog_page_len)
940 goto e_free;
941
942 if (page_offset != 0) {
943 hret = h_get_24x7_catalog_page(page, catalog_version_num,
944 page_offset);
945 if (hret) {
946 ret = -EIO;
947 goto e_free;
948 }
949 }
950
951 copy_len = 4096 - offset_in_page;
952 if (copy_len > count)
953 copy_len = count;
954
955 memcpy(buf, page+offset_in_page, copy_len);
956 ret = copy_len;
957
958 e_free:
959 if (hret)
960 pr_err("h_get_24x7_catalog_page(ver=%lld, page=%lld) failed:"
961 " rc=%ld\n",
962 catalog_version_num, page_offset, hret);
963 kmem_cache_free(hv_page_cache, page);
964
965 pr_devel("catalog_read: offset=%lld(%lld) count=%zu "
966 "catalog_len=%zu(%zu) => %zd\n", offset, page_offset,
967 count, catalog_len, catalog_page_len, ret);
968
969 return ret;
970 }
971
972 #define PAGE_0_ATTR(_name, _fmt, _expr) \
973 static ssize_t _name##_show(struct device *dev, \
974 struct device_attribute *dev_attr, \
975 char *buf) \
976 { \
977 unsigned long hret; \
978 ssize_t ret = 0; \
979 void *page = kmem_cache_alloc(hv_page_cache, GFP_USER); \
980 struct hv_24x7_catalog_page_0 *page_0 = page; \
981 if (!page) \
982 return -ENOMEM; \
983 hret = h_get_24x7_catalog_page(page, 0, 0); \
984 if (hret) { \
985 ret = -EIO; \
986 goto e_free; \
987 } \
988 ret = sprintf(buf, _fmt, _expr); \
989 e_free: \
990 kmem_cache_free(hv_page_cache, page); \
991 return ret; \
992 } \
993 static DEVICE_ATTR_RO(_name)
994
995 PAGE_0_ATTR(catalog_version, "%lld\n",
996 (unsigned long long)be64_to_cpu(page_0->version));
997 PAGE_0_ATTR(catalog_len, "%lld\n",
998 (unsigned long long)be32_to_cpu(page_0->length) * 4096);
999 static BIN_ATTR_RO(catalog, 0/* real length varies */);
1000
1001 static struct bin_attribute *if_bin_attrs[] = {
1002 &bin_attr_catalog,
1003 NULL,
1004 };
1005
1006 static struct attribute *if_attrs[] = {
1007 &dev_attr_catalog_len.attr,
1008 &dev_attr_catalog_version.attr,
1009 NULL,
1010 };
1011
1012 static struct attribute_group if_group = {
1013 .name = "interface",
1014 .bin_attrs = if_bin_attrs,
1015 .attrs = if_attrs,
1016 };
1017
1018 static const struct attribute_group *attr_groups[] = {
1019 &format_group,
1020 &event_group,
1021 &event_desc_group,
1022 &event_long_desc_group,
1023 &if_group,
1024 NULL,
1025 };
1026
1027 static void log_24x7_hcall(struct hv_24x7_request_buffer *request_buffer,
1028 struct hv_24x7_data_result_buffer *result_buffer,
1029 unsigned long ret)
1030 {
1031 struct hv_24x7_request *req;
1032
1033 req = &request_buffer->requests[0];
1034 pr_notice_ratelimited("hcall failed: [%d %#x %#x %d] => "
1035 "ret 0x%lx (%ld) detail=0x%x failing ix=%x\n",
1036 req->performance_domain, req->data_offset,
1037 req->starting_ix, req->starting_lpar_ix, ret, ret,
1038 result_buffer->detailed_rc,
1039 result_buffer->failing_request_ix);
1040 }
1041
1042 /*
1043 * Start the process for a new H_GET_24x7_DATA hcall.
1044 */
1045 static void init_24x7_request(struct hv_24x7_request_buffer *request_buffer,
1046 struct hv_24x7_data_result_buffer *result_buffer)
1047 {
1048
1049 memset(request_buffer, 0, 4096);
1050 memset(result_buffer, 0, 4096);
1051
1052 request_buffer->interface_version = HV_24X7_IF_VERSION_CURRENT;
1053 /* memset above set request_buffer->num_requests to 0 */
1054 }
1055
1056 /*
1057 * Commit (i.e perform) the H_GET_24x7_DATA hcall using the data collected
1058 * by 'init_24x7_request()' and 'add_event_to_24x7_request()'.
1059 */
1060 static int make_24x7_request(struct hv_24x7_request_buffer *request_buffer,
1061 struct hv_24x7_data_result_buffer *result_buffer)
1062 {
1063 unsigned long ret;
1064
1065 /*
1066 * NOTE: Due to variable number of array elements in request and
1067 * result buffer(s), sizeof() is not reliable. Use the actual
1068 * allocated buffer size, H24x7_DATA_BUFFER_SIZE.
1069 */
1070 ret = plpar_hcall_norets(H_GET_24X7_DATA,
1071 virt_to_phys(request_buffer), H24x7_DATA_BUFFER_SIZE,
1072 virt_to_phys(result_buffer), H24x7_DATA_BUFFER_SIZE);
1073
1074 if (ret)
1075 log_24x7_hcall(request_buffer, result_buffer, ret);
1076
1077 return ret;
1078 }
1079
1080 /*
1081 * Add the given @event to the next slot in the 24x7 request_buffer.
1082 *
1083 * Note that H_GET_24X7_DATA hcall allows reading several counters'
1084 * values in a single HCALL. We expect the caller to add events to the
1085 * request buffer one by one, make the HCALL and process the results.
1086 */
1087 static int add_event_to_24x7_request(struct perf_event *event,
1088 struct hv_24x7_request_buffer *request_buffer)
1089 {
1090 u16 idx;
1091 int i;
1092 struct hv_24x7_request *req;
1093
1094 if (request_buffer->num_requests > 254) {
1095 pr_devel("Too many requests for 24x7 HCALL %d\n",
1096 request_buffer->num_requests);
1097 return -EINVAL;
1098 }
1099
1100 switch (event_get_domain(event)) {
1101 case HV_PERF_DOMAIN_PHYS_CHIP:
1102 idx = event_get_chip(event);
1103 break;
1104 case HV_PERF_DOMAIN_PHYS_CORE:
1105 idx = event_get_core(event);
1106 break;
1107 default:
1108 idx = event_get_vcpu(event);
1109 }
1110
1111 i = request_buffer->num_requests++;
1112 req = &request_buffer->requests[i];
1113
1114 req->performance_domain = event_get_domain(event);
1115 req->data_size = cpu_to_be16(8);
1116 req->data_offset = cpu_to_be32(event_get_offset(event));
1117 req->starting_lpar_ix = cpu_to_be16(event_get_lpar(event)),
1118 req->max_num_lpars = cpu_to_be16(1);
1119 req->starting_ix = cpu_to_be16(idx);
1120 req->max_ix = cpu_to_be16(1);
1121
1122 return 0;
1123 }
1124
1125 static unsigned long single_24x7_request(struct perf_event *event, u64 *count)
1126 {
1127 unsigned long ret;
1128 struct hv_24x7_request_buffer *request_buffer;
1129 struct hv_24x7_data_result_buffer *result_buffer;
1130
1131 BUILD_BUG_ON(sizeof(*request_buffer) > 4096);
1132 BUILD_BUG_ON(sizeof(*result_buffer) > 4096);
1133
1134 request_buffer = (void *)get_cpu_var(hv_24x7_reqb);
1135 result_buffer = (void *)get_cpu_var(hv_24x7_resb);
1136
1137 init_24x7_request(request_buffer, result_buffer);
1138
1139 ret = add_event_to_24x7_request(event, request_buffer);
1140 if (ret)
1141 goto out;
1142
1143 ret = make_24x7_request(request_buffer, result_buffer);
1144 if (ret) {
1145 log_24x7_hcall(request_buffer, result_buffer, ret);
1146 goto out;
1147 }
1148
1149 /* process result from hcall */
1150 *count = be64_to_cpu(result_buffer->results[0].elements[0].element_data[0]);
1151
1152 out:
1153 put_cpu_var(hv_24x7_reqb);
1154 put_cpu_var(hv_24x7_resb);
1155 return ret;
1156 }
1157
1158
1159 static int h_24x7_event_init(struct perf_event *event)
1160 {
1161 struct hv_perf_caps caps;
1162 unsigned domain;
1163 unsigned long hret;
1164 u64 ct;
1165
1166 /* Not our event */
1167 if (event->attr.type != event->pmu->type)
1168 return -ENOENT;
1169
1170 /* Unused areas must be 0 */
1171 if (event_get_reserved1(event) ||
1172 event_get_reserved2(event) ||
1173 event_get_reserved3(event)) {
1174 pr_devel("reserved set when forbidden 0x%llx(0x%llx) 0x%llx(0x%llx) 0x%llx(0x%llx)\n",
1175 event->attr.config,
1176 event_get_reserved1(event),
1177 event->attr.config1,
1178 event_get_reserved2(event),
1179 event->attr.config2,
1180 event_get_reserved3(event));
1181 return -EINVAL;
1182 }
1183
1184 /* unsupported modes and filters */
1185 if (event->attr.exclude_user ||
1186 event->attr.exclude_kernel ||
1187 event->attr.exclude_hv ||
1188 event->attr.exclude_idle ||
1189 event->attr.exclude_host ||
1190 event->attr.exclude_guest)
1191 return -EINVAL;
1192
1193 /* no branch sampling */
1194 if (has_branch_stack(event))
1195 return -EOPNOTSUPP;
1196
1197 /* offset must be 8 byte aligned */
1198 if (event_get_offset(event) % 8) {
1199 pr_devel("bad alignment\n");
1200 return -EINVAL;
1201 }
1202
1203 /* Domains above 6 are invalid */
1204 domain = event_get_domain(event);
1205 if (domain > 6) {
1206 pr_devel("invalid domain %d\n", domain);
1207 return -EINVAL;
1208 }
1209
1210 hret = hv_perf_caps_get(&caps);
1211 if (hret) {
1212 pr_devel("could not get capabilities: rc=%ld\n", hret);
1213 return -EIO;
1214 }
1215
1216 /* Physical domains & other lpars require extra capabilities */
1217 if (!caps.collect_privileged && (is_physical_domain(domain) ||
1218 (event_get_lpar(event) != event_get_lpar_max()))) {
1219 pr_devel("hv permissions disallow: is_physical_domain:%d, lpar=0x%llx\n",
1220 is_physical_domain(domain),
1221 event_get_lpar(event));
1222 return -EACCES;
1223 }
1224
1225 /* Get the initial value of the counter for this event */
1226 if (single_24x7_request(event, &ct)) {
1227 pr_devel("test hcall failed\n");
1228 return -EIO;
1229 }
1230 (void)local64_xchg(&event->hw.prev_count, ct);
1231
1232 return 0;
1233 }
1234
1235 static u64 h_24x7_get_value(struct perf_event *event)
1236 {
1237 unsigned long ret;
1238 u64 ct;
1239 ret = single_24x7_request(event, &ct);
1240 if (ret)
1241 /* We checked this in event init, shouldn't fail here... */
1242 return 0;
1243
1244 return ct;
1245 }
1246
1247 static void update_event_count(struct perf_event *event, u64 now)
1248 {
1249 s64 prev;
1250
1251 prev = local64_xchg(&event->hw.prev_count, now);
1252 local64_add(now - prev, &event->count);
1253 }
1254
1255 static void h_24x7_event_read(struct perf_event *event)
1256 {
1257 u64 now;
1258 struct hv_24x7_request_buffer *request_buffer;
1259 struct hv_24x7_hw *h24x7hw;
1260 int txn_flags;
1261
1262 txn_flags = __this_cpu_read(hv_24x7_txn_flags);
1263
1264 /*
1265 * If in a READ transaction, add this counter to the list of
1266 * counters to read during the next HCALL (i.e commit_txn()).
1267 * If not in a READ transaction, go ahead and make the HCALL
1268 * to read this counter by itself.
1269 */
1270
1271 if (txn_flags & PERF_PMU_TXN_READ) {
1272 int i;
1273 int ret;
1274
1275 if (__this_cpu_read(hv_24x7_txn_err))
1276 return;
1277
1278 request_buffer = (void *)get_cpu_var(hv_24x7_reqb);
1279
1280 ret = add_event_to_24x7_request(event, request_buffer);
1281 if (ret) {
1282 __this_cpu_write(hv_24x7_txn_err, ret);
1283 } else {
1284 /*
1285 * Assoicate the event with the HCALL request index,
1286 * so ->commit_txn() can quickly find/update count.
1287 */
1288 i = request_buffer->num_requests - 1;
1289
1290 h24x7hw = &get_cpu_var(hv_24x7_hw);
1291 h24x7hw->events[i] = event;
1292 put_cpu_var(h24x7hw);
1293 /*
1294 * Clear the event count so we can compute the _change_
1295 * in the 24x7 raw counter value at the end of the txn.
1296 *
1297 * Note that we could alternatively read the 24x7 value
1298 * now and save its value in event->hw.prev_count. But
1299 * that would require issuing a hcall, which would then
1300 * defeat the purpose of using the txn interface.
1301 */
1302 local64_set(&event->count, 0);
1303 }
1304
1305 put_cpu_var(hv_24x7_reqb);
1306 } else {
1307 now = h_24x7_get_value(event);
1308 update_event_count(event, now);
1309 }
1310 }
1311
1312 static void h_24x7_event_start(struct perf_event *event, int flags)
1313 {
1314 if (flags & PERF_EF_RELOAD)
1315 local64_set(&event->hw.prev_count, h_24x7_get_value(event));
1316 }
1317
1318 static void h_24x7_event_stop(struct perf_event *event, int flags)
1319 {
1320 h_24x7_event_read(event);
1321 }
1322
1323 static int h_24x7_event_add(struct perf_event *event, int flags)
1324 {
1325 if (flags & PERF_EF_START)
1326 h_24x7_event_start(event, flags);
1327
1328 return 0;
1329 }
1330
1331 /*
1332 * 24x7 counters only support READ transactions. They are
1333 * always counting and dont need/support ADD transactions.
1334 * Cache the flags, but otherwise ignore transactions that
1335 * are not PERF_PMU_TXN_READ.
1336 */
1337 static void h_24x7_event_start_txn(struct pmu *pmu, unsigned int flags)
1338 {
1339 struct hv_24x7_request_buffer *request_buffer;
1340 struct hv_24x7_data_result_buffer *result_buffer;
1341
1342 /* We should not be called if we are already in a txn */
1343 WARN_ON_ONCE(__this_cpu_read(hv_24x7_txn_flags));
1344
1345 __this_cpu_write(hv_24x7_txn_flags, flags);
1346 if (flags & ~PERF_PMU_TXN_READ)
1347 return;
1348
1349 request_buffer = (void *)get_cpu_var(hv_24x7_reqb);
1350 result_buffer = (void *)get_cpu_var(hv_24x7_resb);
1351
1352 init_24x7_request(request_buffer, result_buffer);
1353
1354 put_cpu_var(hv_24x7_resb);
1355 put_cpu_var(hv_24x7_reqb);
1356 }
1357
1358 /*
1359 * Clean up transaction state.
1360 *
1361 * NOTE: Ignore state of request and result buffers for now.
1362 * We will initialize them during the next read/txn.
1363 */
1364 static void reset_txn(void)
1365 {
1366 __this_cpu_write(hv_24x7_txn_flags, 0);
1367 __this_cpu_write(hv_24x7_txn_err, 0);
1368 }
1369
1370 /*
1371 * 24x7 counters only support READ transactions. They are always counting
1372 * and dont need/support ADD transactions. Clear ->txn_flags but otherwise
1373 * ignore transactions that are not of type PERF_PMU_TXN_READ.
1374 *
1375 * For READ transactions, submit all pending 24x7 requests (i.e requests
1376 * that were queued by h_24x7_event_read()), to the hypervisor and update
1377 * the event counts.
1378 */
1379 static int h_24x7_event_commit_txn(struct pmu *pmu)
1380 {
1381 struct hv_24x7_request_buffer *request_buffer;
1382 struct hv_24x7_data_result_buffer *result_buffer;
1383 struct hv_24x7_result *resb;
1384 struct perf_event *event;
1385 u64 count;
1386 int i, ret, txn_flags;
1387 struct hv_24x7_hw *h24x7hw;
1388
1389 txn_flags = __this_cpu_read(hv_24x7_txn_flags);
1390 WARN_ON_ONCE(!txn_flags);
1391
1392 ret = 0;
1393 if (txn_flags & ~PERF_PMU_TXN_READ)
1394 goto out;
1395
1396 ret = __this_cpu_read(hv_24x7_txn_err);
1397 if (ret)
1398 goto out;
1399
1400 request_buffer = (void *)get_cpu_var(hv_24x7_reqb);
1401 result_buffer = (void *)get_cpu_var(hv_24x7_resb);
1402
1403 ret = make_24x7_request(request_buffer, result_buffer);
1404 if (ret) {
1405 log_24x7_hcall(request_buffer, result_buffer, ret);
1406 goto put_reqb;
1407 }
1408
1409 h24x7hw = &get_cpu_var(hv_24x7_hw);
1410
1411 /* Update event counts from hcall */
1412 for (i = 0; i < request_buffer->num_requests; i++) {
1413 resb = &result_buffer->results[i];
1414 count = be64_to_cpu(resb->elements[0].element_data[0]);
1415 event = h24x7hw->events[i];
1416 h24x7hw->events[i] = NULL;
1417 update_event_count(event, count);
1418 }
1419
1420 put_cpu_var(hv_24x7_hw);
1421
1422 put_reqb:
1423 put_cpu_var(hv_24x7_resb);
1424 put_cpu_var(hv_24x7_reqb);
1425 out:
1426 reset_txn();
1427 return ret;
1428 }
1429
1430 /*
1431 * 24x7 counters only support READ transactions. They are always counting
1432 * and dont need/support ADD transactions. However, regardless of type
1433 * of transaction, all we need to do is cleanup, so we don't have to check
1434 * the type of transaction.
1435 */
1436 static void h_24x7_event_cancel_txn(struct pmu *pmu)
1437 {
1438 WARN_ON_ONCE(!__this_cpu_read(hv_24x7_txn_flags));
1439 reset_txn();
1440 }
1441
1442 static struct pmu h_24x7_pmu = {
1443 .task_ctx_nr = perf_invalid_context,
1444
1445 .name = "hv_24x7",
1446 .attr_groups = attr_groups,
1447 .event_init = h_24x7_event_init,
1448 .add = h_24x7_event_add,
1449 .del = h_24x7_event_stop,
1450 .start = h_24x7_event_start,
1451 .stop = h_24x7_event_stop,
1452 .read = h_24x7_event_read,
1453 .start_txn = h_24x7_event_start_txn,
1454 .commit_txn = h_24x7_event_commit_txn,
1455 .cancel_txn = h_24x7_event_cancel_txn,
1456 };
1457
1458 static int hv_24x7_init(void)
1459 {
1460 int r;
1461 unsigned long hret;
1462 struct hv_perf_caps caps;
1463
1464 if (!firmware_has_feature(FW_FEATURE_LPAR)) {
1465 pr_debug("not a virtualized system, not enabling\n");
1466 return -ENODEV;
1467 }
1468
1469 hret = hv_perf_caps_get(&caps);
1470 if (hret) {
1471 pr_debug("could not obtain capabilities, not enabling, rc=%ld\n",
1472 hret);
1473 return -ENODEV;
1474 }
1475
1476 hv_page_cache = kmem_cache_create("hv-page-4096", 4096, 4096, 0, NULL);
1477 if (!hv_page_cache)
1478 return -ENOMEM;
1479
1480 /* sampling not supported */
1481 h_24x7_pmu.capabilities |= PERF_PMU_CAP_NO_INTERRUPT;
1482
1483 r = create_events_from_catalog(&event_group.attrs,
1484 &event_desc_group.attrs,
1485 &event_long_desc_group.attrs);
1486
1487 if (r)
1488 return r;
1489
1490 r = perf_pmu_register(&h_24x7_pmu, h_24x7_pmu.name, -1);
1491 if (r)
1492 return r;
1493
1494 return 0;
1495 }
1496
1497 device_initcall(hv_24x7_init);