]> git.proxmox.com Git - mirror_ubuntu-jammy-kernel.git/blame - arch/powerpc/perf/imc-pmu.c
Merge branch 'timers-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel...
[mirror_ubuntu-jammy-kernel.git] / arch / powerpc / perf / imc-pmu.c
CommitLineData
f4344b19 1// SPDX-License-Identifier: GPL-2.0-or-later
885dcd70
AS
2/*
3 * In-Memory Collection (IMC) Performance Monitor counter support.
4 *
5 * Copyright (C) 2017 Madhavan Srinivasan, IBM Corporation.
6 * (C) 2017 Anju T Sudhakar, IBM Corporation.
7 * (C) 2017 Hemant K Shaw, IBM Corporation.
885dcd70
AS
8 */
9#include <linux/perf_event.h>
10#include <linux/slab.h>
11#include <asm/opal.h>
12#include <asm/imc-pmu.h>
13#include <asm/cputhreads.h>
14#include <asm/smp.h>
15#include <linux/string.h>
16
17/* Nest IMC data structures and variables */
18
19/*
20 * Used to avoid races in counting the nest-pmu units during hotplug
21 * register and unregister
22 */
23static DEFINE_MUTEX(nest_init_lock);
24static DEFINE_PER_CPU(struct imc_pmu_ref *, local_nest_imc_refc);
73ce9aec 25static struct imc_pmu **per_nest_pmu_arr;
885dcd70 26static cpumask_t nest_imc_cpumask;
4851f750 27static struct imc_pmu_ref *nest_imc_refc;
885dcd70
AS
28static int nest_pmus;
29
39a846db
AS
30/* Core IMC data structures and variables */
31
32static cpumask_t core_imc_cpumask;
4851f750 33static struct imc_pmu_ref *core_imc_refc;
39a846db
AS
34static struct imc_pmu *core_imc_pmu;
35
f74c89bd
AS
36/* Thread IMC data structures and variables */
37
38static DEFINE_PER_CPU(u64 *, thread_imc_mem);
25af86b2 39static struct imc_pmu *thread_imc_pmu;
f74c89bd
AS
40static int thread_imc_mem_size;
41
72c69dcd
AS
42/* Trace IMC data structures */
43static DEFINE_PER_CPU(u64 *, trace_imc_mem);
44static struct imc_pmu_ref *trace_imc_refc;
45static int trace_imc_mem_size;
46
4851f750 47static struct imc_pmu *imc_event_to_pmu(struct perf_event *event)
885dcd70
AS
48{
49 return container_of(event->pmu, struct imc_pmu, pmu);
50}
51
012ae244 52PMU_FORMAT_ATTR(event, "config:0-61");
885dcd70
AS
53PMU_FORMAT_ATTR(offset, "config:0-31");
54PMU_FORMAT_ATTR(rvalue, "config:32");
55PMU_FORMAT_ATTR(mode, "config:33-40");
56static struct attribute *imc_format_attrs[] = {
57 &format_attr_event.attr,
58 &format_attr_offset.attr,
59 &format_attr_rvalue.attr,
60 &format_attr_mode.attr,
61 NULL,
62};
63
64static struct attribute_group imc_format_group = {
65 .name = "format",
66 .attrs = imc_format_attrs,
67};
68
012ae244
AS
69/* Format attribute for imc trace-mode */
70PMU_FORMAT_ATTR(cpmc_reserved, "config:0-19");
71PMU_FORMAT_ATTR(cpmc_event, "config:20-27");
72PMU_FORMAT_ATTR(cpmc_samplesel, "config:28-29");
73PMU_FORMAT_ATTR(cpmc_load, "config:30-61");
74static struct attribute *trace_imc_format_attrs[] = {
75 &format_attr_event.attr,
76 &format_attr_cpmc_reserved.attr,
77 &format_attr_cpmc_event.attr,
78 &format_attr_cpmc_samplesel.attr,
79 &format_attr_cpmc_load.attr,
80 NULL,
81};
82
83static struct attribute_group trace_imc_format_group = {
84.name = "format",
85.attrs = trace_imc_format_attrs,
86};
87
885dcd70
AS
88/* Get the cpumask printed to a buffer "buf" */
89static ssize_t imc_pmu_cpumask_get_attr(struct device *dev,
90 struct device_attribute *attr,
91 char *buf)
92{
93 struct pmu *pmu = dev_get_drvdata(dev);
94 struct imc_pmu *imc_pmu = container_of(pmu, struct imc_pmu, pmu);
95 cpumask_t *active_mask;
96
885dcd70
AS
97 switch(imc_pmu->domain){
98 case IMC_DOMAIN_NEST:
99 active_mask = &nest_imc_cpumask;
100 break;
39a846db
AS
101 case IMC_DOMAIN_CORE:
102 active_mask = &core_imc_cpumask;
103 break;
885dcd70
AS
104 default:
105 return 0;
106 }
107
108 return cpumap_print_to_pagebuf(true, buf, active_mask);
109}
110
111static DEVICE_ATTR(cpumask, S_IRUGO, imc_pmu_cpumask_get_attr, NULL);
112
113static struct attribute *imc_pmu_cpumask_attrs[] = {
114 &dev_attr_cpumask.attr,
115 NULL,
116};
117
118static struct attribute_group imc_pmu_cpumask_attr_group = {
119 .attrs = imc_pmu_cpumask_attrs,
120};
121
122/* device_str_attr_create : Populate event "name" and string "str" in attribute */
123static struct attribute *device_str_attr_create(const char *name, const char *str)
124{
125 struct perf_pmu_events_attr *attr;
126
127 attr = kzalloc(sizeof(*attr), GFP_KERNEL);
128 if (!attr)
129 return NULL;
130 sysfs_attr_init(&attr->attr.attr);
131
132 attr->event_str = str;
133 attr->attr.attr.name = name;
134 attr->attr.attr.mode = 0444;
135 attr->attr.show = perf_event_sysfs_show;
136
137 return &attr->attr.attr;
138}
139
8b4e6dea
AS
140static int imc_parse_event(struct device_node *np, const char *scale,
141 const char *unit, const char *prefix,
142 u32 base, struct imc_events *event)
885dcd70 143{
885dcd70
AS
144 const char *s;
145 u32 reg;
146
885dcd70
AS
147 if (of_property_read_u32(np, "reg", &reg))
148 goto error;
149 /* Add the base_reg value to the "reg" */
150 event->value = base + reg;
151
152 if (of_property_read_string(np, "event-name", &s))
153 goto error;
154
155 event->name = kasprintf(GFP_KERNEL, "%s%s", prefix, s);
156 if (!event->name)
157 goto error;
158
159 if (of_property_read_string(np, "scale", &s))
160 s = scale;
161
162 if (s) {
163 event->scale = kstrdup(s, GFP_KERNEL);
164 if (!event->scale)
165 goto error;
166 }
167
168 if (of_property_read_string(np, "unit", &s))
169 s = unit;
170
171 if (s) {
172 event->unit = kstrdup(s, GFP_KERNEL);
173 if (!event->unit)
174 goto error;
175 }
176
8b4e6dea 177 return 0;
885dcd70
AS
178error:
179 kfree(event->unit);
180 kfree(event->scale);
181 kfree(event->name);
8b4e6dea
AS
182 return -EINVAL;
183}
184
185/*
186 * imc_free_events: Function to cleanup the events list, having
187 * "nr_entries".
188 */
189static void imc_free_events(struct imc_events *events, int nr_entries)
190{
191 int i;
192
193 /* Nothing to clean, return */
194 if (!events)
195 return;
196 for (i = 0; i < nr_entries; i++) {
197 kfree(events[i].unit);
198 kfree(events[i].scale);
199 kfree(events[i].name);
200 }
885dcd70 201
8b4e6dea 202 kfree(events);
885dcd70
AS
203}
204
205/*
206 * update_events_in_group: Update the "events" information in an attr_group
207 * and assign the attr_group to the pmu "pmu".
208 */
209static int update_events_in_group(struct device_node *node, struct imc_pmu *pmu)
210{
211 struct attribute_group *attr_group;
212 struct attribute **attrs, *dev_str;
213 struct device_node *np, *pmu_events;
885dcd70 214 u32 handle, base_reg;
8b4e6dea 215 int i = 0, j = 0, ct, ret;
885dcd70
AS
216 const char *prefix, *g_scale, *g_unit;
217 const char *ev_val_str, *ev_scale_str, *ev_unit_str;
218
219 if (!of_property_read_u32(node, "events", &handle))
220 pmu_events = of_find_node_by_phandle(handle);
221 else
222 return 0;
223
224 /* Did not find any node with a given phandle */
225 if (!pmu_events)
226 return 0;
227
228 /* Get a count of number of child nodes */
229 ct = of_get_child_count(pmu_events);
230
231 /* Get the event prefix */
232 if (of_property_read_string(node, "events-prefix", &prefix))
233 return 0;
234
235 /* Get a global unit and scale data if available */
236 if (of_property_read_string(node, "scale", &g_scale))
237 g_scale = NULL;
238
239 if (of_property_read_string(node, "unit", &g_unit))
240 g_unit = NULL;
241
242 /* "reg" property gives out the base offset of the counters data */
243 of_property_read_u32(node, "reg", &base_reg);
244
245 /* Allocate memory for the events */
246 pmu->events = kcalloc(ct, sizeof(struct imc_events), GFP_KERNEL);
247 if (!pmu->events)
248 return -ENOMEM;
249
250 ct = 0;
251 /* Parse the events and update the struct */
252 for_each_child_of_node(pmu_events, np) {
8b4e6dea
AS
253 ret = imc_parse_event(np, g_scale, g_unit, prefix, base_reg, &pmu->events[ct]);
254 if (!ret)
255 ct++;
885dcd70
AS
256 }
257
258 /* Allocate memory for attribute group */
259 attr_group = kzalloc(sizeof(*attr_group), GFP_KERNEL);
8b4e6dea
AS
260 if (!attr_group) {
261 imc_free_events(pmu->events, ct);
885dcd70 262 return -ENOMEM;
8b4e6dea 263 }
885dcd70
AS
264
265 /*
266 * Allocate memory for attributes.
267 * Since we have count of events for this pmu, we also allocate
268 * memory for the scale and unit attribute for now.
269 * "ct" has the total event structs added from the events-parent node.
270 * So allocate three times the "ct" (this includes event, event_scale and
271 * event_unit).
272 */
273 attrs = kcalloc(((ct * 3) + 1), sizeof(struct attribute *), GFP_KERNEL);
274 if (!attrs) {
275 kfree(attr_group);
8b4e6dea 276 imc_free_events(pmu->events, ct);
885dcd70
AS
277 return -ENOMEM;
278 }
279
280 attr_group->name = "events";
281 attr_group->attrs = attrs;
282 do {
8b4e6dea
AS
283 ev_val_str = kasprintf(GFP_KERNEL, "event=0x%x", pmu->events[i].value);
284 dev_str = device_str_attr_create(pmu->events[i].name, ev_val_str);
885dcd70
AS
285 if (!dev_str)
286 continue;
287
288 attrs[j++] = dev_str;
8b4e6dea
AS
289 if (pmu->events[i].scale) {
290 ev_scale_str = kasprintf(GFP_KERNEL, "%s.scale", pmu->events[i].name);
291 dev_str = device_str_attr_create(ev_scale_str, pmu->events[i].scale);
885dcd70
AS
292 if (!dev_str)
293 continue;
294
295 attrs[j++] = dev_str;
296 }
297
8b4e6dea
AS
298 if (pmu->events[i].unit) {
299 ev_unit_str = kasprintf(GFP_KERNEL, "%s.unit", pmu->events[i].name);
300 dev_str = device_str_attr_create(ev_unit_str, pmu->events[i].unit);
885dcd70
AS
301 if (!dev_str)
302 continue;
303
304 attrs[j++] = dev_str;
305 }
306 } while (++i < ct);
307
308 /* Save the event attribute */
309 pmu->attr_groups[IMC_EVENT_ATTR] = attr_group;
310
885dcd70
AS
311 return 0;
312}
313
314/* get_nest_pmu_ref: Return the imc_pmu_ref struct for the given node */
315static struct imc_pmu_ref *get_nest_pmu_ref(int cpu)
316{
317 return per_cpu(local_nest_imc_refc, cpu);
318}
319
320static void nest_change_cpu_context(int old_cpu, int new_cpu)
321{
322 struct imc_pmu **pn = per_nest_pmu_arr;
885dcd70
AS
323
324 if (old_cpu < 0 || new_cpu < 0)
325 return;
326
73ce9aec 327 while (*pn) {
885dcd70 328 perf_pmu_migrate_context(&(*pn)->pmu, old_cpu, new_cpu);
73ce9aec
MS
329 pn++;
330 }
885dcd70
AS
331}
332
333static int ppc_nest_imc_cpu_offline(unsigned int cpu)
334{
335 int nid, target = -1;
336 const struct cpumask *l_cpumask;
337 struct imc_pmu_ref *ref;
338
339 /*
340 * Check in the designated list for this cpu. Dont bother
341 * if not one of them.
342 */
343 if (!cpumask_test_and_clear_cpu(cpu, &nest_imc_cpumask))
344 return 0;
345
ad2b6e01
AS
346 /*
347 * Check whether nest_imc is registered. We could end up here if the
348 * cpuhotplug callback registration fails. i.e, callback invokes the
349 * offline path for all successfully registered nodes. At this stage,
350 * nest_imc pmu will not be registered and we should return here.
351 *
352 * We return with a zero since this is not an offline failure. And
353 * cpuhp_setup_state() returns the actual failure reason to the caller,
354 * which in turn will call the cleanup routine.
355 */
356 if (!nest_pmus)
357 return 0;
358
885dcd70
AS
359 /*
360 * Now that this cpu is one of the designated,
361 * find a next cpu a) which is online and b) in same chip.
362 */
363 nid = cpu_to_node(cpu);
364 l_cpumask = cpumask_of_node(nid);
365 target = cpumask_any_but(l_cpumask, cpu);
366
367 /*
368 * Update the cpumask with the target cpu and
369 * migrate the context if needed
370 */
371 if (target >= 0 && target < nr_cpu_ids) {
372 cpumask_set_cpu(target, &nest_imc_cpumask);
373 nest_change_cpu_context(cpu, target);
374 } else {
375 opal_imc_counters_stop(OPAL_IMC_COUNTERS_NEST,
376 get_hard_smp_processor_id(cpu));
377 /*
378 * If this is the last cpu in this chip then, skip the reference
379 * count mutex lock and make the reference count on this chip zero.
380 */
381 ref = get_nest_pmu_ref(cpu);
382 if (!ref)
383 return -EINVAL;
384
385 ref->refc = 0;
386 }
387 return 0;
388}
389
390static int ppc_nest_imc_cpu_online(unsigned int cpu)
391{
392 const struct cpumask *l_cpumask;
393 static struct cpumask tmp_mask;
394 int res;
395
396 /* Get the cpumask of this node */
397 l_cpumask = cpumask_of_node(cpu_to_node(cpu));
398
399 /*
400 * If this is not the first online CPU on this node, then
401 * just return.
402 */
403 if (cpumask_and(&tmp_mask, l_cpumask, &nest_imc_cpumask))
404 return 0;
405
406 /*
407 * If this is the first online cpu on this node
408 * disable the nest counters by making an OPAL call.
409 */
410 res = opal_imc_counters_stop(OPAL_IMC_COUNTERS_NEST,
411 get_hard_smp_processor_id(cpu));
412 if (res)
413 return res;
414
415 /* Make this CPU the designated target for counter collection */
416 cpumask_set_cpu(cpu, &nest_imc_cpumask);
417 return 0;
418}
419
420static int nest_pmu_cpumask_init(void)
421{
422 return cpuhp_setup_state(CPUHP_AP_PERF_POWERPC_NEST_IMC_ONLINE,
423 "perf/powerpc/imc:online",
424 ppc_nest_imc_cpu_online,
425 ppc_nest_imc_cpu_offline);
426}
427
428static void nest_imc_counters_release(struct perf_event *event)
429{
430 int rc, node_id;
431 struct imc_pmu_ref *ref;
432
433 if (event->cpu < 0)
434 return;
435
436 node_id = cpu_to_node(event->cpu);
437
438 /*
439 * See if we need to disable the nest PMU.
440 * If no events are currently in use, then we have to take a
441 * mutex to ensure that we don't race with another task doing
442 * enable or disable the nest counters.
443 */
444 ref = get_nest_pmu_ref(event->cpu);
445 if (!ref)
446 return;
447
448 /* Take the mutex lock for this node and then decrement the reference count */
449 mutex_lock(&ref->lock);
0d923820
AS
450 if (ref->refc == 0) {
451 /*
452 * The scenario where this is true is, when perf session is
453 * started, followed by offlining of all cpus in a given node.
454 *
455 * In the cpuhotplug offline path, ppc_nest_imc_cpu_offline()
456 * function set the ref->count to zero, if the cpu which is
457 * about to offline is the last cpu in a given node and make
458 * an OPAL call to disable the engine in that node.
459 *
460 */
461 mutex_unlock(&ref->lock);
462 return;
463 }
885dcd70
AS
464 ref->refc--;
465 if (ref->refc == 0) {
466 rc = opal_imc_counters_stop(OPAL_IMC_COUNTERS_NEST,
467 get_hard_smp_processor_id(event->cpu));
468 if (rc) {
711bd207 469 mutex_unlock(&ref->lock);
885dcd70
AS
470 pr_err("nest-imc: Unable to stop the counters for core %d\n", node_id);
471 return;
472 }
473 } else if (ref->refc < 0) {
474 WARN(1, "nest-imc: Invalid event reference count\n");
475 ref->refc = 0;
476 }
477 mutex_unlock(&ref->lock);
478}
479
480static int nest_imc_event_init(struct perf_event *event)
481{
482 int chip_id, rc, node_id;
483 u32 l_config, config = event->attr.config;
484 struct imc_mem_info *pcni;
485 struct imc_pmu *pmu;
486 struct imc_pmu_ref *ref;
487 bool flag = false;
488
489 if (event->attr.type != event->pmu->type)
490 return -ENOENT;
491
492 /* Sampling not supported */
493 if (event->hw.sample_period)
494 return -EINVAL;
495
885dcd70
AS
496 if (event->cpu < 0)
497 return -EINVAL;
498
499 pmu = imc_event_to_pmu(event);
500
501 /* Sanity check for config (event offset) */
502 if ((config & IMC_EVENT_OFFSET_MASK) > pmu->counter_mem_size)
503 return -EINVAL;
504
505 /*
506 * Nest HW counter memory resides in a per-chip reserve-memory (HOMER).
507 * Get the base memory addresss for this cpu.
508 */
f3f1dfd6 509 chip_id = cpu_to_chip_id(event->cpu);
a913e5e8
AS
510
511 /* Return, if chip_id is not valid */
512 if (chip_id < 0)
513 return -ENODEV;
514
885dcd70
AS
515 pcni = pmu->mem_info;
516 do {
517 if (pcni->id == chip_id) {
518 flag = true;
519 break;
520 }
521 pcni++;
860b7d22 522 } while (pcni->vbase != 0);
885dcd70
AS
523
524 if (!flag)
525 return -ENODEV;
526
527 /*
528 * Add the event offset to the base address.
529 */
530 l_config = config & IMC_EVENT_OFFSET_MASK;
531 event->hw.event_base = (u64)pcni->vbase + l_config;
532 node_id = cpu_to_node(event->cpu);
533
534 /*
535 * Get the imc_pmu_ref struct for this node.
536 * Take the mutex lock and then increment the count of nest pmu events
537 * inited.
538 */
539 ref = get_nest_pmu_ref(event->cpu);
540 if (!ref)
541 return -EINVAL;
542
543 mutex_lock(&ref->lock);
544 if (ref->refc == 0) {
545 rc = opal_imc_counters_start(OPAL_IMC_COUNTERS_NEST,
546 get_hard_smp_processor_id(event->cpu));
547 if (rc) {
711bd207 548 mutex_unlock(&ref->lock);
885dcd70
AS
549 pr_err("nest-imc: Unable to start the counters for node %d\n",
550 node_id);
551 return rc;
552 }
553 }
554 ++ref->refc;
555 mutex_unlock(&ref->lock);
556
557 event->destroy = nest_imc_counters_release;
558 return 0;
559}
560
39a846db
AS
561/*
562 * core_imc_mem_init : Initializes memory for the current core.
563 *
564 * Uses alloc_pages_node() and uses the returned address as an argument to
565 * an opal call to configure the pdbar. The address sent as an argument is
566 * converted to physical address before the opal call is made. This is the
567 * base address at which the core imc counters are populated.
568 */
569static int core_imc_mem_init(int cpu, int size)
570{
f3f1dfd6 571 int nid, rc = 0, core_id = (cpu / threads_per_core);
39a846db
AS
572 struct imc_mem_info *mem_info;
573
574 /*
575 * alloc_pages_node() will allocate memory for core in the
576 * local node only.
577 */
f3f1dfd6 578 nid = cpu_to_node(cpu);
39a846db
AS
579 mem_info = &core_imc_pmu->mem_info[core_id];
580 mem_info->id = core_id;
581
582 /* We need only vbase for core counters */
f3f1dfd6 583 mem_info->vbase = page_address(alloc_pages_node(nid,
cd4f2b30
AS
584 GFP_KERNEL | __GFP_ZERO | __GFP_THISNODE |
585 __GFP_NOWARN, get_order(size)));
39a846db
AS
586 if (!mem_info->vbase)
587 return -ENOMEM;
588
589 /* Init the mutex */
590 core_imc_refc[core_id].id = core_id;
591 mutex_init(&core_imc_refc[core_id].lock);
592
593 rc = opal_imc_counters_init(OPAL_IMC_COUNTERS_CORE,
594 __pa((void *)mem_info->vbase),
595 get_hard_smp_processor_id(cpu));
596 if (rc) {
597 free_pages((u64)mem_info->vbase, get_order(size));
598 mem_info->vbase = NULL;
599 }
600
601 return rc;
602}
603
604static bool is_core_imc_mem_inited(int cpu)
605{
606 struct imc_mem_info *mem_info;
607 int core_id = (cpu / threads_per_core);
608
609 mem_info = &core_imc_pmu->mem_info[core_id];
610 if (!mem_info->vbase)
611 return false;
612
613 return true;
614}
615
616static int ppc_core_imc_cpu_online(unsigned int cpu)
617{
618 const struct cpumask *l_cpumask;
619 static struct cpumask tmp_mask;
620 int ret = 0;
621
622 /* Get the cpumask for this core */
623 l_cpumask = cpu_sibling_mask(cpu);
624
625 /* If a cpu for this core is already set, then, don't do anything */
626 if (cpumask_and(&tmp_mask, l_cpumask, &core_imc_cpumask))
627 return 0;
628
629 if (!is_core_imc_mem_inited(cpu)) {
630 ret = core_imc_mem_init(cpu, core_imc_pmu->counter_mem_size);
631 if (ret) {
632 pr_info("core_imc memory allocation for cpu %d failed\n", cpu);
633 return ret;
634 }
635 }
636
637 /* set the cpu in the mask */
638 cpumask_set_cpu(cpu, &core_imc_cpumask);
639 return 0;
640}
641
642static int ppc_core_imc_cpu_offline(unsigned int cpu)
643{
074db39e
AS
644 unsigned int core_id;
645 int ncpu;
39a846db
AS
646 struct imc_pmu_ref *ref;
647
648 /*
649 * clear this cpu out of the mask, if not present in the mask,
650 * don't bother doing anything.
651 */
652 if (!cpumask_test_and_clear_cpu(cpu, &core_imc_cpumask))
653 return 0;
654
7ecb37f6
MS
655 /*
656 * Check whether core_imc is registered. We could end up here
657 * if the cpuhotplug callback registration fails. i.e, callback
658 * invokes the offline path for all sucessfully registered cpus.
659 * At this stage, core_imc pmu will not be registered and we
660 * should return here.
661 *
662 * We return with a zero since this is not an offline failure.
663 * And cpuhp_setup_state() returns the actual failure reason
664 * to the caller, which inturn will call the cleanup routine.
665 */
666 if (!core_imc_pmu->pmu.event_init)
667 return 0;
668
39a846db
AS
669 /* Find any online cpu in that core except the current "cpu" */
670 ncpu = cpumask_any_but(cpu_sibling_mask(cpu), cpu);
671
672 if (ncpu >= 0 && ncpu < nr_cpu_ids) {
673 cpumask_set_cpu(ncpu, &core_imc_cpumask);
674 perf_pmu_migrate_context(&core_imc_pmu->pmu, cpu, ncpu);
675 } else {
676 /*
677 * If this is the last cpu in this core then, skip taking refernce
678 * count mutex lock for this core and directly zero "refc" for
679 * this core.
680 */
681 opal_imc_counters_stop(OPAL_IMC_COUNTERS_CORE,
682 get_hard_smp_processor_id(cpu));
683 core_id = cpu / threads_per_core;
684 ref = &core_imc_refc[core_id];
685 if (!ref)
686 return -EINVAL;
687
688 ref->refc = 0;
689 }
690 return 0;
691}
692
693static int core_imc_pmu_cpumask_init(void)
694{
695 return cpuhp_setup_state(CPUHP_AP_PERF_POWERPC_CORE_IMC_ONLINE,
696 "perf/powerpc/imc_core:online",
697 ppc_core_imc_cpu_online,
698 ppc_core_imc_cpu_offline);
699}
700
701static void core_imc_counters_release(struct perf_event *event)
702{
703 int rc, core_id;
704 struct imc_pmu_ref *ref;
705
706 if (event->cpu < 0)
707 return;
708 /*
709 * See if we need to disable the IMC PMU.
710 * If no events are currently in use, then we have to take a
711 * mutex to ensure that we don't race with another task doing
712 * enable or disable the core counters.
713 */
714 core_id = event->cpu / threads_per_core;
715
716 /* Take the mutex lock and decrement the refernce count for this core */
717 ref = &core_imc_refc[core_id];
718 if (!ref)
719 return;
720
721 mutex_lock(&ref->lock);
0d923820
AS
722 if (ref->refc == 0) {
723 /*
724 * The scenario where this is true is, when perf session is
725 * started, followed by offlining of all cpus in a given core.
726 *
727 * In the cpuhotplug offline path, ppc_core_imc_cpu_offline()
728 * function set the ref->count to zero, if the cpu which is
729 * about to offline is the last cpu in a given core and make
730 * an OPAL call to disable the engine in that core.
731 *
732 */
733 mutex_unlock(&ref->lock);
734 return;
735 }
39a846db
AS
736 ref->refc--;
737 if (ref->refc == 0) {
738 rc = opal_imc_counters_stop(OPAL_IMC_COUNTERS_CORE,
739 get_hard_smp_processor_id(event->cpu));
740 if (rc) {
741 mutex_unlock(&ref->lock);
742 pr_err("IMC: Unable to stop the counters for core %d\n", core_id);
743 return;
744 }
745 } else if (ref->refc < 0) {
746 WARN(1, "core-imc: Invalid event reference count\n");
747 ref->refc = 0;
748 }
749 mutex_unlock(&ref->lock);
750}
751
752static int core_imc_event_init(struct perf_event *event)
753{
754 int core_id, rc;
755 u64 config = event->attr.config;
756 struct imc_mem_info *pcmi;
757 struct imc_pmu *pmu;
758 struct imc_pmu_ref *ref;
759
760 if (event->attr.type != event->pmu->type)
761 return -ENOENT;
762
763 /* Sampling not supported */
764 if (event->hw.sample_period)
765 return -EINVAL;
766
39a846db
AS
767 if (event->cpu < 0)
768 return -EINVAL;
769
770 event->hw.idx = -1;
771 pmu = imc_event_to_pmu(event);
772
773 /* Sanity check for config (event offset) */
774 if (((config & IMC_EVENT_OFFSET_MASK) > pmu->counter_mem_size))
775 return -EINVAL;
776
777 if (!is_core_imc_mem_inited(event->cpu))
778 return -ENODEV;
779
780 core_id = event->cpu / threads_per_core;
781 pcmi = &core_imc_pmu->mem_info[core_id];
782 if ((!pcmi->vbase))
783 return -ENODEV;
784
785 /* Get the core_imc mutex for this core */
786 ref = &core_imc_refc[core_id];
787 if (!ref)
788 return -EINVAL;
789
790 /*
791 * Core pmu units are enabled only when it is used.
792 * See if this is triggered for the first time.
793 * If yes, take the mutex lock and enable the core counters.
794 * If not, just increment the count in core_imc_refc struct.
795 */
796 mutex_lock(&ref->lock);
797 if (ref->refc == 0) {
798 rc = opal_imc_counters_start(OPAL_IMC_COUNTERS_CORE,
799 get_hard_smp_processor_id(event->cpu));
800 if (rc) {
801 mutex_unlock(&ref->lock);
802 pr_err("core-imc: Unable to start the counters for core %d\n",
803 core_id);
804 return rc;
805 }
806 }
807 ++ref->refc;
808 mutex_unlock(&ref->lock);
809
810 event->hw.event_base = (u64)pcmi->vbase + (config & IMC_EVENT_OFFSET_MASK);
811 event->destroy = core_imc_counters_release;
812 return 0;
813}
814
f74c89bd 815/*
dd50cf7c
AS
816 * Allocates a page of memory for each of the online cpus, and load
817 * LDBAR with 0.
818 * The physical base address of the page allocated for a cpu will be
819 * written to the LDBAR for that cpu, when the thread-imc event
820 * is added.
f74c89bd
AS
821 *
822 * LDBAR Register Layout:
823 *
824 * 0 4 8 12 16 20 24 28
825 * | - - - - | - - - - | - - - - | - - - - | - - - - | - - - - | - - - - | - - - - |
826 * | | [ ] [ Counter Address [8:50]
827 * | * Mode |
828 * | * PB Scope
829 * * Enable/Disable
830 *
831 * 32 36 40 44 48 52 56 60
832 * | - - - - | - - - - | - - - - | - - - - | - - - - | - - - - | - - - - | - - - - |
833 * Counter Address [8:50] ]
834 *
835 */
836static int thread_imc_mem_alloc(int cpu_id, int size)
837{
dd50cf7c 838 u64 *local_mem = per_cpu(thread_imc_mem, cpu_id);
f3f1dfd6 839 int nid = cpu_to_node(cpu_id);
f74c89bd
AS
840
841 if (!local_mem) {
842 /*
843 * This case could happen only once at start, since we dont
844 * free the memory in cpu offline path.
845 */
f3f1dfd6 846 local_mem = page_address(alloc_pages_node(nid,
cd4f2b30
AS
847 GFP_KERNEL | __GFP_ZERO | __GFP_THISNODE |
848 __GFP_NOWARN, get_order(size)));
f74c89bd
AS
849 if (!local_mem)
850 return -ENOMEM;
851
852 per_cpu(thread_imc_mem, cpu_id) = local_mem;
853 }
854
dd50cf7c 855 mtspr(SPRN_LDBAR, 0);
f74c89bd
AS
856 return 0;
857}
858
859static int ppc_thread_imc_cpu_online(unsigned int cpu)
885dcd70 860{
f74c89bd
AS
861 return thread_imc_mem_alloc(cpu, thread_imc_mem_size);
862}
863
864static int ppc_thread_imc_cpu_offline(unsigned int cpu)
865{
866 mtspr(SPRN_LDBAR, 0);
867 return 0;
868}
869
870static int thread_imc_cpu_init(void)
871{
872 return cpuhp_setup_state(CPUHP_AP_PERF_POWERPC_THREAD_IMC_ONLINE,
873 "perf/powerpc/imc_thread:online",
874 ppc_thread_imc_cpu_online,
875 ppc_thread_imc_cpu_offline);
876}
877
f74c89bd
AS
878static int thread_imc_event_init(struct perf_event *event)
879{
880 u32 config = event->attr.config;
881 struct task_struct *target;
882 struct imc_pmu *pmu;
883
884 if (event->attr.type != event->pmu->type)
885 return -ENOENT;
886
216c3087
MS
887 if (!capable(CAP_SYS_ADMIN))
888 return -EACCES;
889
f74c89bd
AS
890 /* Sampling not supported */
891 if (event->hw.sample_period)
892 return -EINVAL;
893
894 event->hw.idx = -1;
895 pmu = imc_event_to_pmu(event);
896
897 /* Sanity check for config offset */
898 if (((config & IMC_EVENT_OFFSET_MASK) > pmu->counter_mem_size))
899 return -EINVAL;
900
901 target = event->hw.target;
902 if (!target)
903 return -EINVAL;
904
905 event->pmu->task_ctx_nr = perf_sw_context;
906 return 0;
907}
908
909static bool is_thread_imc_pmu(struct perf_event *event)
910{
911 if (!strncmp(event->pmu->name, "thread_imc", strlen("thread_imc")))
912 return true;
913
914 return false;
915}
916
917static u64 * get_event_base_addr(struct perf_event *event)
918{
919 u64 addr;
920
921 if (is_thread_imc_pmu(event)) {
922 addr = (u64)per_cpu(thread_imc_mem, smp_processor_id());
923 return (u64 *)(addr + (event->attr.config & IMC_EVENT_OFFSET_MASK));
924 }
925
885dcd70
AS
926 return (u64 *)event->hw.event_base;
927}
928
f74c89bd
AS
929static void thread_imc_pmu_start_txn(struct pmu *pmu,
930 unsigned int txn_flags)
931{
932 if (txn_flags & ~PERF_PMU_TXN_ADD)
933 return;
934 perf_pmu_disable(pmu);
935}
936
937static void thread_imc_pmu_cancel_txn(struct pmu *pmu)
938{
939 perf_pmu_enable(pmu);
940}
941
942static int thread_imc_pmu_commit_txn(struct pmu *pmu)
943{
944 perf_pmu_enable(pmu);
945 return 0;
946}
947
885dcd70
AS
948static u64 imc_read_counter(struct perf_event *event)
949{
950 u64 *addr, data;
951
952 /*
953 * In-Memory Collection (IMC) counters are free flowing counters.
954 * So we take a snapshot of the counter value on enable and save it
955 * to calculate the delta at later stage to present the event counter
956 * value.
957 */
958 addr = get_event_base_addr(event);
959 data = be64_to_cpu(READ_ONCE(*addr));
960 local64_set(&event->hw.prev_count, data);
961
962 return data;
963}
964
965static void imc_event_update(struct perf_event *event)
966{
967 u64 counter_prev, counter_new, final_count;
968
969 counter_prev = local64_read(&event->hw.prev_count);
970 counter_new = imc_read_counter(event);
971 final_count = counter_new - counter_prev;
972
973 /* Update the delta to the event count */
974 local64_add(final_count, &event->count);
975}
976
977static void imc_event_start(struct perf_event *event, int flags)
978{
979 /*
980 * In Memory Counters are free flowing counters. HW or the microcode
981 * keeps adding to the counter offset in memory. To get event
982 * counter value, we snapshot the value here and we calculate
983 * delta at later point.
984 */
985 imc_read_counter(event);
986}
987
988static void imc_event_stop(struct perf_event *event, int flags)
989{
990 /*
991 * Take a snapshot and calculate the delta and update
992 * the event counter values.
993 */
994 imc_event_update(event);
995}
996
997static int imc_event_add(struct perf_event *event, int flags)
998{
999 if (flags & PERF_EF_START)
1000 imc_event_start(event, flags);
1001
1002 return 0;
1003}
1004
f74c89bd
AS
1005static int thread_imc_event_add(struct perf_event *event, int flags)
1006{
7ccc4fe5
AS
1007 int core_id;
1008 struct imc_pmu_ref *ref;
dd50cf7c 1009 u64 ldbar_value, *local_mem = per_cpu(thread_imc_mem, smp_processor_id());
7ccc4fe5 1010
f74c89bd
AS
1011 if (flags & PERF_EF_START)
1012 imc_event_start(event, flags);
1013
7ccc4fe5
AS
1014 if (!is_core_imc_mem_inited(smp_processor_id()))
1015 return -EINVAL;
1016
1017 core_id = smp_processor_id() / threads_per_core;
dd50cf7c
AS
1018 ldbar_value = ((u64)local_mem & THREAD_IMC_LDBAR_MASK) | THREAD_IMC_ENABLE;
1019 mtspr(SPRN_LDBAR, ldbar_value);
1020
7ccc4fe5
AS
1021 /*
1022 * imc pmus are enabled only when it is used.
1023 * See if this is triggered for the first time.
1024 * If yes, take the mutex lock and enable the counters.
1025 * If not, just increment the count in ref count struct.
1026 */
1027 ref = &core_imc_refc[core_id];
1028 if (!ref)
1029 return -EINVAL;
1030
1031 mutex_lock(&ref->lock);
1032 if (ref->refc == 0) {
1033 if (opal_imc_counters_start(OPAL_IMC_COUNTERS_CORE,
1034 get_hard_smp_processor_id(smp_processor_id()))) {
1035 mutex_unlock(&ref->lock);
1036 pr_err("thread-imc: Unable to start the counter\
1037 for core %d\n", core_id);
1038 return -EINVAL;
1039 }
1040 }
1041 ++ref->refc;
1042 mutex_unlock(&ref->lock);
f74c89bd
AS
1043 return 0;
1044}
1045
1046static void thread_imc_event_del(struct perf_event *event, int flags)
1047{
7ccc4fe5
AS
1048
1049 int core_id;
1050 struct imc_pmu_ref *ref;
1051
dd50cf7c 1052 mtspr(SPRN_LDBAR, 0);
7ccc4fe5
AS
1053
1054 core_id = smp_processor_id() / threads_per_core;
1055 ref = &core_imc_refc[core_id];
1056
1057 mutex_lock(&ref->lock);
1058 ref->refc--;
1059 if (ref->refc == 0) {
1060 if (opal_imc_counters_stop(OPAL_IMC_COUNTERS_CORE,
1061 get_hard_smp_processor_id(smp_processor_id()))) {
1062 mutex_unlock(&ref->lock);
1063 pr_err("thread-imc: Unable to stop the counters\
1064 for core %d\n", core_id);
1065 return;
1066 }
1067 } else if (ref->refc < 0) {
1068 ref->refc = 0;
1069 }
1070 mutex_unlock(&ref->lock);
dd50cf7c
AS
1071 /*
1072 * Take a snapshot and calculate the delta and update
1073 * the event counter values.
1074 */
1075 imc_event_update(event);
f74c89bd
AS
1076}
1077
72c69dcd
AS
1078/*
1079 * Allocate a page of memory for each cpu, and load LDBAR with 0.
1080 */
1081static int trace_imc_mem_alloc(int cpu_id, int size)
1082{
1083 u64 *local_mem = per_cpu(trace_imc_mem, cpu_id);
1084 int phys_id = cpu_to_node(cpu_id), rc = 0;
1085 int core_id = (cpu_id / threads_per_core);
1086
1087 if (!local_mem) {
1088 local_mem = page_address(alloc_pages_node(phys_id,
1089 GFP_KERNEL | __GFP_ZERO | __GFP_THISNODE |
1090 __GFP_NOWARN, get_order(size)));
1091 if (!local_mem)
1092 return -ENOMEM;
1093 per_cpu(trace_imc_mem, cpu_id) = local_mem;
1094
1095 /* Initialise the counters for trace mode */
1096 rc = opal_imc_counters_init(OPAL_IMC_COUNTERS_TRACE, __pa((void *)local_mem),
1097 get_hard_smp_processor_id(cpu_id));
1098 if (rc) {
1099 pr_info("IMC:opal init failed for trace imc\n");
1100 return rc;
1101 }
1102 }
1103
1104 /* Init the mutex, if not already */
1105 trace_imc_refc[core_id].id = core_id;
1106 mutex_init(&trace_imc_refc[core_id].lock);
1107
1108 mtspr(SPRN_LDBAR, 0);
1109 return 0;
1110}
1111
1112static int ppc_trace_imc_cpu_online(unsigned int cpu)
1113{
1114 return trace_imc_mem_alloc(cpu, trace_imc_mem_size);
1115}
1116
1117static int ppc_trace_imc_cpu_offline(unsigned int cpu)
1118{
1119 mtspr(SPRN_LDBAR, 0);
1120 return 0;
1121}
1122
1123static int trace_imc_cpu_init(void)
1124{
1125 return cpuhp_setup_state(CPUHP_AP_PERF_POWERPC_TRACE_IMC_ONLINE,
1126 "perf/powerpc/imc_trace:online",
1127 ppc_trace_imc_cpu_online,
1128 ppc_trace_imc_cpu_offline);
1129}
1130
012ae244
AS
1131static u64 get_trace_imc_event_base_addr(void)
1132{
1133 return (u64)per_cpu(trace_imc_mem, smp_processor_id());
1134}
1135
1136/*
1137 * Function to parse trace-imc data obtained
1138 * and to prepare the perf sample.
1139 */
1140static int trace_imc_prepare_sample(struct trace_imc_data *mem,
1141 struct perf_sample_data *data,
1142 u64 *prev_tb,
1143 struct perf_event_header *header,
1144 struct perf_event *event)
1145{
1146 /* Sanity checks for a valid record */
1147 if (be64_to_cpu(READ_ONCE(mem->tb1)) > *prev_tb)
1148 *prev_tb = be64_to_cpu(READ_ONCE(mem->tb1));
1149 else
1150 return -EINVAL;
1151
1152 if ((be64_to_cpu(READ_ONCE(mem->tb1)) & IMC_TRACE_RECORD_TB1_MASK) !=
1153 be64_to_cpu(READ_ONCE(mem->tb2)))
1154 return -EINVAL;
1155
1156 /* Prepare perf sample */
1157 data->ip = be64_to_cpu(READ_ONCE(mem->ip));
1158 data->period = event->hw.last_period;
1159
1160 header->type = PERF_RECORD_SAMPLE;
1161 header->size = sizeof(*header) + event->header_size;
1162 header->misc = 0;
1163
1164 if (is_kernel_addr(data->ip))
1165 header->misc |= PERF_RECORD_MISC_KERNEL;
1166 else
1167 header->misc |= PERF_RECORD_MISC_USER;
1168
1169 perf_event_header__init_id(header, data, event);
1170
1171 return 0;
1172}
1173
1174static void dump_trace_imc_data(struct perf_event *event)
1175{
1176 struct trace_imc_data *mem;
1177 int i, ret;
1178 u64 prev_tb = 0;
1179
1180 mem = (struct trace_imc_data *)get_trace_imc_event_base_addr();
1181 for (i = 0; i < (trace_imc_mem_size / sizeof(struct trace_imc_data));
1182 i++, mem++) {
1183 struct perf_sample_data data;
1184 struct perf_event_header header;
1185
1186 ret = trace_imc_prepare_sample(mem, &data, &prev_tb, &header, event);
1187 if (ret) /* Exit, if not a valid record */
1188 break;
1189 else {
1190 /* If this is a valid record, create the sample */
1191 struct perf_output_handle handle;
1192
1193 if (perf_output_begin(&handle, event, header.size))
1194 return;
1195
1196 perf_output_sample(&handle, &header, &data, event);
1197 perf_output_end(&handle);
1198 }
1199 }
1200}
1201
1202static int trace_imc_event_add(struct perf_event *event, int flags)
1203{
1204 int core_id = smp_processor_id() / threads_per_core;
1205 struct imc_pmu_ref *ref = NULL;
1206 u64 local_mem, ldbar_value;
1207
1208 /* Set trace-imc bit in ldbar and load ldbar with per-thread memory address */
1209 local_mem = get_trace_imc_event_base_addr();
1210 ldbar_value = ((u64)local_mem & THREAD_IMC_LDBAR_MASK) | TRACE_IMC_ENABLE;
1211
1212 if (core_imc_refc)
1213 ref = &core_imc_refc[core_id];
1214 if (!ref) {
1215 /* If core-imc is not enabled, use trace-imc reference count */
1216 if (trace_imc_refc)
1217 ref = &trace_imc_refc[core_id];
1218 if (!ref)
1219 return -EINVAL;
1220 }
1221 mtspr(SPRN_LDBAR, ldbar_value);
1222 mutex_lock(&ref->lock);
1223 if (ref->refc == 0) {
1224 if (opal_imc_counters_start(OPAL_IMC_COUNTERS_TRACE,
1225 get_hard_smp_processor_id(smp_processor_id()))) {
1226 mutex_unlock(&ref->lock);
1227 pr_err("trace-imc: Unable to start the counters for core %d\n", core_id);
1228 mtspr(SPRN_LDBAR, 0);
1229 return -EINVAL;
1230 }
1231 }
1232 ++ref->refc;
1233 mutex_unlock(&ref->lock);
1234
1235 return 0;
1236}
1237
1238static void trace_imc_event_read(struct perf_event *event)
1239{
1240 return;
1241}
1242
1243static void trace_imc_event_stop(struct perf_event *event, int flags)
1244{
1245 u64 local_mem = get_trace_imc_event_base_addr();
1246 dump_trace_imc_data(event);
1247 memset((void *)local_mem, 0, sizeof(u64));
1248}
1249
1250static void trace_imc_event_start(struct perf_event *event, int flags)
1251{
1252 return;
1253}
1254
1255static void trace_imc_event_del(struct perf_event *event, int flags)
1256{
1257 int core_id = smp_processor_id() / threads_per_core;
1258 struct imc_pmu_ref *ref = NULL;
1259
1260 if (core_imc_refc)
1261 ref = &core_imc_refc[core_id];
1262 if (!ref) {
1263 /* If core-imc is not enabled, use trace-imc reference count */
1264 if (trace_imc_refc)
1265 ref = &trace_imc_refc[core_id];
1266 if (!ref)
1267 return;
1268 }
1269 mtspr(SPRN_LDBAR, 0);
1270 mutex_lock(&ref->lock);
1271 ref->refc--;
1272 if (ref->refc == 0) {
1273 if (opal_imc_counters_stop(OPAL_IMC_COUNTERS_TRACE,
1274 get_hard_smp_processor_id(smp_processor_id()))) {
1275 mutex_unlock(&ref->lock);
1276 pr_err("trace-imc: Unable to stop the counters for core %d\n", core_id);
1277 return;
1278 }
1279 } else if (ref->refc < 0) {
1280 ref->refc = 0;
1281 }
1282 mutex_unlock(&ref->lock);
1283 trace_imc_event_stop(event, flags);
1284}
1285
1286static int trace_imc_event_init(struct perf_event *event)
1287{
1288 struct task_struct *target;
1289
1290 if (event->attr.type != event->pmu->type)
1291 return -ENOENT;
1292
1293 if (!capable(CAP_SYS_ADMIN))
1294 return -EACCES;
1295
1296 /* Return if this is a couting event */
1297 if (event->attr.sample_period == 0)
1298 return -ENOENT;
1299
1300 event->hw.idx = -1;
1301 target = event->hw.target;
1302
1303 event->pmu->task_ctx_nr = perf_hw_context;
1304 return 0;
1305}
1306
885dcd70
AS
1307/* update_pmu_ops : Populate the appropriate operations for "pmu" */
1308static int update_pmu_ops(struct imc_pmu *pmu)
1309{
1310 pmu->pmu.task_ctx_nr = perf_invalid_context;
1311 pmu->pmu.add = imc_event_add;
1312 pmu->pmu.del = imc_event_stop;
1313 pmu->pmu.start = imc_event_start;
1314 pmu->pmu.stop = imc_event_stop;
1315 pmu->pmu.read = imc_event_update;
1316 pmu->pmu.attr_groups = pmu->attr_groups;
c2c9091d 1317 pmu->pmu.capabilities = PERF_PMU_CAP_NO_EXCLUDE;
885dcd70
AS
1318 pmu->attr_groups[IMC_FORMAT_ATTR] = &imc_format_group;
1319
885dcd70
AS
1320 switch (pmu->domain) {
1321 case IMC_DOMAIN_NEST:
1322 pmu->pmu.event_init = nest_imc_event_init;
1323 pmu->attr_groups[IMC_CPUMASK_ATTR] = &imc_pmu_cpumask_attr_group;
1324 break;
39a846db
AS
1325 case IMC_DOMAIN_CORE:
1326 pmu->pmu.event_init = core_imc_event_init;
1327 pmu->attr_groups[IMC_CPUMASK_ATTR] = &imc_pmu_cpumask_attr_group;
1328 break;
f74c89bd
AS
1329 case IMC_DOMAIN_THREAD:
1330 pmu->pmu.event_init = thread_imc_event_init;
f74c89bd
AS
1331 pmu->pmu.add = thread_imc_event_add;
1332 pmu->pmu.del = thread_imc_event_del;
1333 pmu->pmu.start_txn = thread_imc_pmu_start_txn;
1334 pmu->pmu.cancel_txn = thread_imc_pmu_cancel_txn;
1335 pmu->pmu.commit_txn = thread_imc_pmu_commit_txn;
1336 break;
012ae244
AS
1337 case IMC_DOMAIN_TRACE:
1338 pmu->pmu.event_init = trace_imc_event_init;
1339 pmu->pmu.add = trace_imc_event_add;
1340 pmu->pmu.del = trace_imc_event_del;
1341 pmu->pmu.start = trace_imc_event_start;
1342 pmu->pmu.stop = trace_imc_event_stop;
1343 pmu->pmu.read = trace_imc_event_read;
1344 pmu->attr_groups[IMC_FORMAT_ATTR] = &trace_imc_format_group;
885dcd70
AS
1345 default:
1346 break;
1347 }
1348
1349 return 0;
1350}
1351
1352/* init_nest_pmu_ref: Initialize the imc_pmu_ref struct for all the nodes */
1353static int init_nest_pmu_ref(void)
1354{
1355 int nid, i, cpu;
1356
1357 nest_imc_refc = kcalloc(num_possible_nodes(), sizeof(*nest_imc_refc),
1358 GFP_KERNEL);
1359
1360 if (!nest_imc_refc)
1361 return -ENOMEM;
1362
1363 i = 0;
1364 for_each_node(nid) {
1365 /*
1366 * Mutex lock to avoid races while tracking the number of
1367 * sessions using the chip's nest pmu units.
1368 */
1369 mutex_init(&nest_imc_refc[i].lock);
1370
1371 /*
1372 * Loop to init the "id" with the node_id. Variable "i" initialized to
1373 * 0 and will be used as index to the array. "i" will not go off the
1374 * end of the array since the "for_each_node" loops for "N_POSSIBLE"
1375 * nodes only.
1376 */
1377 nest_imc_refc[i++].id = nid;
1378 }
1379
1380 /*
1381 * Loop to init the per_cpu "local_nest_imc_refc" with the proper
1382 * "nest_imc_refc" index. This makes get_nest_pmu_ref() alot simple.
1383 */
1384 for_each_possible_cpu(cpu) {
1385 nid = cpu_to_node(cpu);
7efbae90 1386 for (i = 0; i < num_possible_nodes(); i++) {
885dcd70
AS
1387 if (nest_imc_refc[i].id == nid) {
1388 per_cpu(local_nest_imc_refc, cpu) = &nest_imc_refc[i];
1389 break;
1390 }
1391 }
1392 }
1393 return 0;
1394}
1395
39a846db
AS
1396static void cleanup_all_core_imc_memory(void)
1397{
d2032678 1398 int i, nr_cores = DIV_ROUND_UP(num_possible_cpus(), threads_per_core);
39a846db
AS
1399 struct imc_mem_info *ptr = core_imc_pmu->mem_info;
1400 int size = core_imc_pmu->counter_mem_size;
1401
1402 /* mem_info will never be NULL */
1403 for (i = 0; i < nr_cores; i++) {
1404 if (ptr[i].vbase)
cb094fa5 1405 free_pages((u64)ptr[i].vbase, get_order(size));
39a846db
AS
1406 }
1407
1408 kfree(ptr);
1409 kfree(core_imc_refc);
1410}
1411
f74c89bd
AS
1412static void thread_imc_ldbar_disable(void *dummy)
1413{
1414 /*
1415 * By Zeroing LDBAR, we disable thread-imc
1416 * updates.
1417 */
1418 mtspr(SPRN_LDBAR, 0);
1419}
1420
1421void thread_imc_disable(void)
1422{
1423 on_each_cpu(thread_imc_ldbar_disable, NULL, 1);
1424}
1425
1426static void cleanup_all_thread_imc_memory(void)
1427{
1428 int i, order = get_order(thread_imc_mem_size);
1429
1430 for_each_online_cpu(i) {
1431 if (per_cpu(thread_imc_mem, i))
1432 free_pages((u64)per_cpu(thread_imc_mem, i), order);
1433
1434 }
1435}
1436
72c69dcd
AS
1437static void cleanup_all_trace_imc_memory(void)
1438{
1439 int i, order = get_order(trace_imc_mem_size);
1440
1441 for_each_online_cpu(i) {
1442 if (per_cpu(trace_imc_mem, i))
1443 free_pages((u64)per_cpu(trace_imc_mem, i), order);
1444
1445 }
1446 kfree(trace_imc_refc);
1447}
1448
ed8e443f
AS
1449/* Function to free the attr_groups which are dynamically allocated */
1450static void imc_common_mem_free(struct imc_pmu *pmu_ptr)
1451{
1452 if (pmu_ptr->attr_groups[IMC_EVENT_ATTR])
1453 kfree(pmu_ptr->attr_groups[IMC_EVENT_ATTR]->attrs);
1454 kfree(pmu_ptr->attr_groups[IMC_EVENT_ATTR]);
ed8e443f
AS
1455}
1456
885dcd70
AS
1457/*
1458 * Common function to unregister cpu hotplug callback and
1459 * free the memory.
1460 * TODO: Need to handle pmu unregistering, which will be
1461 * done in followup series.
1462 */
1463static void imc_common_cpuhp_mem_free(struct imc_pmu *pmu_ptr)
1464{
1465 if (pmu_ptr->domain == IMC_DOMAIN_NEST) {
b3376dcc 1466 mutex_lock(&nest_init_lock);
885dcd70
AS
1467 if (nest_pmus == 1) {
1468 cpuhp_remove_state(CPUHP_AP_PERF_POWERPC_NEST_IMC_ONLINE);
1469 kfree(nest_imc_refc);
110df8bd 1470 kfree(per_nest_pmu_arr);
cb094fa5 1471 per_nest_pmu_arr = NULL;
885dcd70
AS
1472 }
1473
1474 if (nest_pmus > 0)
1475 nest_pmus--;
1476 mutex_unlock(&nest_init_lock);
1477 }
1478
39a846db
AS
1479 /* Free core_imc memory */
1480 if (pmu_ptr->domain == IMC_DOMAIN_CORE) {
1481 cpuhp_remove_state(CPUHP_AP_PERF_POWERPC_CORE_IMC_ONLINE);
1482 cleanup_all_core_imc_memory();
1483 }
1484
f74c89bd
AS
1485 /* Free thread_imc memory */
1486 if (pmu_ptr->domain == IMC_DOMAIN_THREAD) {
1487 cpuhp_remove_state(CPUHP_AP_PERF_POWERPC_THREAD_IMC_ONLINE);
1488 cleanup_all_thread_imc_memory();
1489 }
72c69dcd
AS
1490
1491 if (pmu_ptr->domain == IMC_DOMAIN_TRACE) {
1492 cpuhp_remove_state(CPUHP_AP_PERF_POWERPC_TRACE_IMC_ONLINE);
1493 cleanup_all_trace_imc_memory();
1494 }
885dcd70
AS
1495}
1496
25af86b2
AS
1497/*
1498 * Function to unregister thread-imc if core-imc
1499 * is not registered.
1500 */
1501void unregister_thread_imc(void)
1502{
1503 imc_common_cpuhp_mem_free(thread_imc_pmu);
1504 imc_common_mem_free(thread_imc_pmu);
1505 perf_pmu_unregister(&thread_imc_pmu->pmu);
1506}
885dcd70
AS
1507
1508/*
1509 * imc_mem_init : Function to support memory allocation for core imc.
1510 */
1511static int imc_mem_init(struct imc_pmu *pmu_ptr, struct device_node *parent,
1512 int pmu_index)
1513{
1514 const char *s;
b41bb28b 1515 int nr_cores, cpu, res = -ENOMEM;
885dcd70
AS
1516
1517 if (of_property_read_string(parent, "name", &s))
1518 return -ENODEV;
1519
885dcd70
AS
1520 switch (pmu_ptr->domain) {
1521 case IMC_DOMAIN_NEST:
1522 /* Update the pmu name */
1523 pmu_ptr->pmu.name = kasprintf(GFP_KERNEL, "%s%s_imc", "nest_", s);
1524 if (!pmu_ptr->pmu.name)
b41bb28b 1525 goto err;
885dcd70
AS
1526
1527 /* Needed for hotplug/migration */
73ce9aec
MS
1528 if (!per_nest_pmu_arr) {
1529 per_nest_pmu_arr = kcalloc(get_max_nest_dev() + 1,
1530 sizeof(struct imc_pmu *),
1531 GFP_KERNEL);
1532 if (!per_nest_pmu_arr)
b41bb28b 1533 goto err;
73ce9aec 1534 }
885dcd70
AS
1535 per_nest_pmu_arr[pmu_index] = pmu_ptr;
1536 break;
39a846db
AS
1537 case IMC_DOMAIN_CORE:
1538 /* Update the pmu name */
1539 pmu_ptr->pmu.name = kasprintf(GFP_KERNEL, "%s%s", s, "_imc");
1540 if (!pmu_ptr->pmu.name)
b41bb28b 1541 goto err;
39a846db 1542
d2032678 1543 nr_cores = DIV_ROUND_UP(num_possible_cpus(), threads_per_core);
39a846db
AS
1544 pmu_ptr->mem_info = kcalloc(nr_cores, sizeof(struct imc_mem_info),
1545 GFP_KERNEL);
1546
1547 if (!pmu_ptr->mem_info)
b41bb28b 1548 goto err;
39a846db
AS
1549
1550 core_imc_refc = kcalloc(nr_cores, sizeof(struct imc_pmu_ref),
1551 GFP_KERNEL);
1552
ed8e443f
AS
1553 if (!core_imc_refc) {
1554 kfree(pmu_ptr->mem_info);
b41bb28b 1555 goto err;
ed8e443f 1556 }
39a846db
AS
1557
1558 core_imc_pmu = pmu_ptr;
1559 break;
f74c89bd
AS
1560 case IMC_DOMAIN_THREAD:
1561 /* Update the pmu name */
1562 pmu_ptr->pmu.name = kasprintf(GFP_KERNEL, "%s%s", s, "_imc");
1563 if (!pmu_ptr->pmu.name)
b41bb28b 1564 goto err;
f74c89bd
AS
1565
1566 thread_imc_mem_size = pmu_ptr->counter_mem_size;
1567 for_each_online_cpu(cpu) {
1568 res = thread_imc_mem_alloc(cpu, pmu_ptr->counter_mem_size);
ed8e443f
AS
1569 if (res) {
1570 cleanup_all_thread_imc_memory();
b41bb28b 1571 goto err;
ed8e443f 1572 }
f74c89bd
AS
1573 }
1574
25af86b2 1575 thread_imc_pmu = pmu_ptr;
f74c89bd 1576 break;
72c69dcd
AS
1577 case IMC_DOMAIN_TRACE:
1578 /* Update the pmu name */
1579 pmu_ptr->pmu.name = kasprintf(GFP_KERNEL, "%s%s", s, "_imc");
1580 if (!pmu_ptr->pmu.name)
1581 return -ENOMEM;
1582
1583 nr_cores = DIV_ROUND_UP(num_possible_cpus(), threads_per_core);
1584 trace_imc_refc = kcalloc(nr_cores, sizeof(struct imc_pmu_ref),
1585 GFP_KERNEL);
1586 if (!trace_imc_refc)
1587 return -ENOMEM;
1588
1589 trace_imc_mem_size = pmu_ptr->counter_mem_size;
1590 for_each_online_cpu(cpu) {
1591 res = trace_imc_mem_alloc(cpu, trace_imc_mem_size);
1592 if (res) {
1593 cleanup_all_trace_imc_memory();
1594 goto err;
1595 }
1596 }
1597 break;
885dcd70
AS
1598 default:
1599 return -EINVAL;
1600 }
1601
1602 return 0;
b41bb28b
AS
1603err:
1604 return res;
885dcd70
AS
1605}
1606
1607/*
1608 * init_imc_pmu : Setup and register the IMC pmu device.
1609 *
1610 * @parent: Device tree unit node
1611 * @pmu_ptr: memory allocated for this pmu
1612 * @pmu_idx: Count of nest pmc registered
1613 *
1614 * init_imc_pmu() setup pmu cpumask and registers for a cpu hotplug callback.
1615 * Handles failure cases and accordingly frees memory.
1616 */
1617int init_imc_pmu(struct device_node *parent, struct imc_pmu *pmu_ptr, int pmu_idx)
1618{
1619 int ret;
1620
1621 ret = imc_mem_init(pmu_ptr, parent, pmu_idx);
cb094fa5
AS
1622 if (ret)
1623 goto err_free_mem;
885dcd70 1624
885dcd70
AS
1625 switch (pmu_ptr->domain) {
1626 case IMC_DOMAIN_NEST:
1627 /*
1628 * Nest imc pmu need only one cpu per chip, we initialize the
1629 * cpumask for the first nest imc pmu and use the same for the
1630 * rest. To handle the cpuhotplug callback unregister, we track
1631 * the number of nest pmus in "nest_pmus".
1632 */
1633 mutex_lock(&nest_init_lock);
1634 if (nest_pmus == 0) {
1635 ret = init_nest_pmu_ref();
1636 if (ret) {
1637 mutex_unlock(&nest_init_lock);
cb094fa5
AS
1638 kfree(per_nest_pmu_arr);
1639 per_nest_pmu_arr = NULL;
1640 goto err_free_mem;
885dcd70
AS
1641 }
1642 /* Register for cpu hotplug notification. */
1643 ret = nest_pmu_cpumask_init();
1644 if (ret) {
1645 mutex_unlock(&nest_init_lock);
110df8bd
AS
1646 kfree(nest_imc_refc);
1647 kfree(per_nest_pmu_arr);
cb094fa5
AS
1648 per_nest_pmu_arr = NULL;
1649 goto err_free_mem;
885dcd70
AS
1650 }
1651 }
1652 nest_pmus++;
1653 mutex_unlock(&nest_init_lock);
39a846db
AS
1654 break;
1655 case IMC_DOMAIN_CORE:
1656 ret = core_imc_pmu_cpumask_init();
1657 if (ret) {
1658 cleanup_all_core_imc_memory();
cb094fa5 1659 goto err_free_mem;
39a846db
AS
1660 }
1661
f74c89bd
AS
1662 break;
1663 case IMC_DOMAIN_THREAD:
1664 ret = thread_imc_cpu_init();
1665 if (ret) {
1666 cleanup_all_thread_imc_memory();
cb094fa5 1667 goto err_free_mem;
f74c89bd
AS
1668 }
1669
72c69dcd
AS
1670 break;
1671 case IMC_DOMAIN_TRACE:
1672 ret = trace_imc_cpu_init();
1673 if (ret) {
1674 cleanup_all_trace_imc_memory();
1675 goto err_free_mem;
1676 }
1677
885dcd70
AS
1678 break;
1679 default:
e7a8ac43 1680 return -EINVAL; /* Unknown domain */
885dcd70
AS
1681 }
1682
1683 ret = update_events_in_group(parent, pmu_ptr);
1684 if (ret)
cb094fa5 1685 goto err_free_cpuhp_mem;
885dcd70
AS
1686
1687 ret = update_pmu_ops(pmu_ptr);
1688 if (ret)
cb094fa5 1689 goto err_free_cpuhp_mem;
885dcd70
AS
1690
1691 ret = perf_pmu_register(&pmu_ptr->pmu, pmu_ptr->pmu.name, -1);
1692 if (ret)
cb094fa5 1693 goto err_free_cpuhp_mem;
885dcd70 1694
6233b6da 1695 pr_debug("%s performance monitor hardware support registered\n",
885dcd70
AS
1696 pmu_ptr->pmu.name);
1697
1698 return 0;
1699
cb094fa5 1700err_free_cpuhp_mem:
885dcd70 1701 imc_common_cpuhp_mem_free(pmu_ptr);
cb094fa5
AS
1702err_free_mem:
1703 imc_common_mem_free(pmu_ptr);
885dcd70
AS
1704 return ret;
1705}