1 #include "perf_event_intel_uncore.h"
3 static struct intel_uncore_type
*empty_uncore
[] = { NULL
, };
4 struct intel_uncore_type
**uncore_msr_uncores
= empty_uncore
;
5 struct intel_uncore_type
**uncore_pci_uncores
= empty_uncore
;
7 static bool pcidrv_registered
;
8 struct pci_driver
*uncore_pci_driver
;
9 /* pci bus to socket mapping */
10 DEFINE_RAW_SPINLOCK(pci2phy_map_lock
);
11 struct list_head pci2phy_map_head
= LIST_HEAD_INIT(pci2phy_map_head
);
12 struct pci_dev
*uncore_extra_pci_dev
[UNCORE_SOCKET_MAX
][UNCORE_EXTRA_PCI_DEV_MAX
];
14 static DEFINE_RAW_SPINLOCK(uncore_box_lock
);
15 /* mask of cpus that collect uncore events */
16 static cpumask_t uncore_cpu_mask
;
18 /* constraint for the fixed counter */
19 static struct event_constraint uncore_constraint_fixed
=
20 EVENT_CONSTRAINT(~0ULL, 1 << UNCORE_PMC_IDX_FIXED
, ~0ULL);
21 struct event_constraint uncore_constraint_empty
=
22 EVENT_CONSTRAINT(0, 0, 0);
24 int uncore_pcibus_to_physid(struct pci_bus
*bus
)
26 struct pci2phy_map
*map
;
29 raw_spin_lock(&pci2phy_map_lock
);
30 list_for_each_entry(map
, &pci2phy_map_head
, list
) {
31 if (map
->segment
== pci_domain_nr(bus
)) {
32 phys_id
= map
->pbus_to_physid
[bus
->number
];
36 raw_spin_unlock(&pci2phy_map_lock
);
41 struct pci2phy_map
*__find_pci2phy_map(int segment
)
43 struct pci2phy_map
*map
, *alloc
= NULL
;
46 lockdep_assert_held(&pci2phy_map_lock
);
49 list_for_each_entry(map
, &pci2phy_map_head
, list
) {
50 if (map
->segment
== segment
)
55 raw_spin_unlock(&pci2phy_map_lock
);
56 alloc
= kmalloc(sizeof(struct pci2phy_map
), GFP_KERNEL
);
57 raw_spin_lock(&pci2phy_map_lock
);
67 map
->segment
= segment
;
68 for (i
= 0; i
< 256; i
++)
69 map
->pbus_to_physid
[i
] = -1;
70 list_add_tail(&map
->list
, &pci2phy_map_head
);
77 ssize_t
uncore_event_show(struct kobject
*kobj
,
78 struct kobj_attribute
*attr
, char *buf
)
80 struct uncore_event_desc
*event
=
81 container_of(attr
, struct uncore_event_desc
, attr
);
82 return sprintf(buf
, "%s", event
->config
);
85 struct intel_uncore_pmu
*uncore_event_to_pmu(struct perf_event
*event
)
87 return container_of(event
->pmu
, struct intel_uncore_pmu
, pmu
);
90 struct intel_uncore_box
*uncore_pmu_to_box(struct intel_uncore_pmu
*pmu
, int cpu
)
92 struct intel_uncore_box
*box
;
94 box
= *per_cpu_ptr(pmu
->box
, cpu
);
98 raw_spin_lock(&uncore_box_lock
);
99 /* Recheck in lock to handle races. */
100 if (*per_cpu_ptr(pmu
->box
, cpu
))
102 list_for_each_entry(box
, &pmu
->box_list
, list
) {
103 if (box
->phys_id
== topology_physical_package_id(cpu
)) {
104 atomic_inc(&box
->refcnt
);
105 *per_cpu_ptr(pmu
->box
, cpu
) = box
;
110 raw_spin_unlock(&uncore_box_lock
);
112 return *per_cpu_ptr(pmu
->box
, cpu
);
115 struct intel_uncore_box
*uncore_event_to_box(struct perf_event
*event
)
118 * perf core schedules event on the basis of cpu, uncore events are
119 * collected by one of the cpus inside a physical package.
121 return uncore_pmu_to_box(uncore_event_to_pmu(event
), smp_processor_id());
124 u64
uncore_msr_read_counter(struct intel_uncore_box
*box
, struct perf_event
*event
)
128 rdmsrl(event
->hw
.event_base
, count
);
134 * generic get constraint function for shared match/mask registers.
136 struct event_constraint
*
137 uncore_get_constraint(struct intel_uncore_box
*box
, struct perf_event
*event
)
139 struct intel_uncore_extra_reg
*er
;
140 struct hw_perf_event_extra
*reg1
= &event
->hw
.extra_reg
;
141 struct hw_perf_event_extra
*reg2
= &event
->hw
.branch_reg
;
146 * reg->alloc can be set due to existing state, so for fake box we
147 * need to ignore this, otherwise we might fail to allocate proper
148 * fake state for this extra reg constraint.
150 if (reg1
->idx
== EXTRA_REG_NONE
||
151 (!uncore_box_is_fake(box
) && reg1
->alloc
))
154 er
= &box
->shared_regs
[reg1
->idx
];
155 raw_spin_lock_irqsave(&er
->lock
, flags
);
156 if (!atomic_read(&er
->ref
) ||
157 (er
->config1
== reg1
->config
&& er
->config2
== reg2
->config
)) {
158 atomic_inc(&er
->ref
);
159 er
->config1
= reg1
->config
;
160 er
->config2
= reg2
->config
;
163 raw_spin_unlock_irqrestore(&er
->lock
, flags
);
166 if (!uncore_box_is_fake(box
))
171 return &uncore_constraint_empty
;
174 void uncore_put_constraint(struct intel_uncore_box
*box
, struct perf_event
*event
)
176 struct intel_uncore_extra_reg
*er
;
177 struct hw_perf_event_extra
*reg1
= &event
->hw
.extra_reg
;
180 * Only put constraint if extra reg was actually allocated. Also
181 * takes care of event which do not use an extra shared reg.
183 * Also, if this is a fake box we shouldn't touch any event state
184 * (reg->alloc) and we don't care about leaving inconsistent box
185 * state either since it will be thrown out.
187 if (uncore_box_is_fake(box
) || !reg1
->alloc
)
190 er
= &box
->shared_regs
[reg1
->idx
];
191 atomic_dec(&er
->ref
);
195 u64
uncore_shared_reg_config(struct intel_uncore_box
*box
, int idx
)
197 struct intel_uncore_extra_reg
*er
;
201 er
= &box
->shared_regs
[idx
];
203 raw_spin_lock_irqsave(&er
->lock
, flags
);
205 raw_spin_unlock_irqrestore(&er
->lock
, flags
);
210 static void uncore_assign_hw_event(struct intel_uncore_box
*box
, struct perf_event
*event
, int idx
)
212 struct hw_perf_event
*hwc
= &event
->hw
;
215 hwc
->last_tag
= ++box
->tags
[idx
];
217 if (hwc
->idx
== UNCORE_PMC_IDX_FIXED
) {
218 hwc
->event_base
= uncore_fixed_ctr(box
);
219 hwc
->config_base
= uncore_fixed_ctl(box
);
223 hwc
->config_base
= uncore_event_ctl(box
, hwc
->idx
);
224 hwc
->event_base
= uncore_perf_ctr(box
, hwc
->idx
);
227 void uncore_perf_event_update(struct intel_uncore_box
*box
, struct perf_event
*event
)
229 u64 prev_count
, new_count
, delta
;
232 if (event
->hw
.idx
>= UNCORE_PMC_IDX_FIXED
)
233 shift
= 64 - uncore_fixed_ctr_bits(box
);
235 shift
= 64 - uncore_perf_ctr_bits(box
);
237 /* the hrtimer might modify the previous event value */
239 prev_count
= local64_read(&event
->hw
.prev_count
);
240 new_count
= uncore_read_counter(box
, event
);
241 if (local64_xchg(&event
->hw
.prev_count
, new_count
) != prev_count
)
244 delta
= (new_count
<< shift
) - (prev_count
<< shift
);
247 local64_add(delta
, &event
->count
);
251 * The overflow interrupt is unavailable for SandyBridge-EP, is broken
252 * for SandyBridge. So we use hrtimer to periodically poll the counter
255 static enum hrtimer_restart
uncore_pmu_hrtimer(struct hrtimer
*hrtimer
)
257 struct intel_uncore_box
*box
;
258 struct perf_event
*event
;
262 box
= container_of(hrtimer
, struct intel_uncore_box
, hrtimer
);
263 if (!box
->n_active
|| box
->cpu
!= smp_processor_id())
264 return HRTIMER_NORESTART
;
266 * disable local interrupt to prevent uncore_pmu_event_start/stop
267 * to interrupt the update process
269 local_irq_save(flags
);
272 * handle boxes with an active event list as opposed to active
275 list_for_each_entry(event
, &box
->active_list
, active_entry
) {
276 uncore_perf_event_update(box
, event
);
279 for_each_set_bit(bit
, box
->active_mask
, UNCORE_PMC_IDX_MAX
)
280 uncore_perf_event_update(box
, box
->events
[bit
]);
282 local_irq_restore(flags
);
284 hrtimer_forward_now(hrtimer
, ns_to_ktime(box
->hrtimer_duration
));
285 return HRTIMER_RESTART
;
288 void uncore_pmu_start_hrtimer(struct intel_uncore_box
*box
)
290 hrtimer_start(&box
->hrtimer
, ns_to_ktime(box
->hrtimer_duration
),
291 HRTIMER_MODE_REL_PINNED
);
294 void uncore_pmu_cancel_hrtimer(struct intel_uncore_box
*box
)
296 hrtimer_cancel(&box
->hrtimer
);
299 static void uncore_pmu_init_hrtimer(struct intel_uncore_box
*box
)
301 hrtimer_init(&box
->hrtimer
, CLOCK_MONOTONIC
, HRTIMER_MODE_REL
);
302 box
->hrtimer
.function
= uncore_pmu_hrtimer
;
305 static struct intel_uncore_box
*uncore_alloc_box(struct intel_uncore_type
*type
, int node
)
307 struct intel_uncore_box
*box
;
310 size
= sizeof(*box
) + type
->num_shared_regs
* sizeof(struct intel_uncore_extra_reg
);
312 box
= kzalloc_node(size
, GFP_KERNEL
, node
);
316 for (i
= 0; i
< type
->num_shared_regs
; i
++)
317 raw_spin_lock_init(&box
->shared_regs
[i
].lock
);
319 uncore_pmu_init_hrtimer(box
);
320 atomic_set(&box
->refcnt
, 1);
324 /* set default hrtimer timeout */
325 box
->hrtimer_duration
= UNCORE_PMU_HRTIMER_INTERVAL
;
327 INIT_LIST_HEAD(&box
->active_list
);
333 * Using uncore_pmu_event_init pmu event_init callback
334 * as a detection point for uncore events.
336 static int uncore_pmu_event_init(struct perf_event
*event
);
338 static bool is_uncore_event(struct perf_event
*event
)
340 return event
->pmu
->event_init
== uncore_pmu_event_init
;
344 uncore_collect_events(struct intel_uncore_box
*box
, struct perf_event
*leader
, bool dogrp
)
346 struct perf_event
*event
;
349 max_count
= box
->pmu
->type
->num_counters
;
350 if (box
->pmu
->type
->fixed_ctl
)
353 if (box
->n_events
>= max_count
)
358 if (is_uncore_event(leader
)) {
359 box
->event_list
[n
] = leader
;
366 list_for_each_entry(event
, &leader
->sibling_list
, group_entry
) {
367 if (!is_uncore_event(event
) ||
368 event
->state
<= PERF_EVENT_STATE_OFF
)
374 box
->event_list
[n
] = event
;
380 static struct event_constraint
*
381 uncore_get_event_constraint(struct intel_uncore_box
*box
, struct perf_event
*event
)
383 struct intel_uncore_type
*type
= box
->pmu
->type
;
384 struct event_constraint
*c
;
386 if (type
->ops
->get_constraint
) {
387 c
= type
->ops
->get_constraint(box
, event
);
392 if (event
->attr
.config
== UNCORE_FIXED_EVENT
)
393 return &uncore_constraint_fixed
;
395 if (type
->constraints
) {
396 for_each_event_constraint(c
, type
->constraints
) {
397 if ((event
->hw
.config
& c
->cmask
) == c
->code
)
402 return &type
->unconstrainted
;
405 static void uncore_put_event_constraint(struct intel_uncore_box
*box
, struct perf_event
*event
)
407 if (box
->pmu
->type
->ops
->put_constraint
)
408 box
->pmu
->type
->ops
->put_constraint(box
, event
);
411 static int uncore_assign_events(struct intel_uncore_box
*box
, int assign
[], int n
)
413 unsigned long used_mask
[BITS_TO_LONGS(UNCORE_PMC_IDX_MAX
)];
414 struct event_constraint
*c
;
415 int i
, wmin
, wmax
, ret
= 0;
416 struct hw_perf_event
*hwc
;
418 bitmap_zero(used_mask
, UNCORE_PMC_IDX_MAX
);
420 for (i
= 0, wmin
= UNCORE_PMC_IDX_MAX
, wmax
= 0; i
< n
; i
++) {
421 c
= uncore_get_event_constraint(box
, box
->event_list
[i
]);
422 box
->event_constraint
[i
] = c
;
423 wmin
= min(wmin
, c
->weight
);
424 wmax
= max(wmax
, c
->weight
);
427 /* fastpath, try to reuse previous register */
428 for (i
= 0; i
< n
; i
++) {
429 hwc
= &box
->event_list
[i
]->hw
;
430 c
= box
->event_constraint
[i
];
436 /* constraint still honored */
437 if (!test_bit(hwc
->idx
, c
->idxmsk
))
440 /* not already used */
441 if (test_bit(hwc
->idx
, used_mask
))
444 __set_bit(hwc
->idx
, used_mask
);
446 assign
[i
] = hwc
->idx
;
450 ret
= perf_assign_events(box
->event_constraint
, n
,
451 wmin
, wmax
, n
, assign
);
453 if (!assign
|| ret
) {
454 for (i
= 0; i
< n
; i
++)
455 uncore_put_event_constraint(box
, box
->event_list
[i
]);
457 return ret
? -EINVAL
: 0;
460 static void uncore_pmu_event_start(struct perf_event
*event
, int flags
)
462 struct intel_uncore_box
*box
= uncore_event_to_box(event
);
463 int idx
= event
->hw
.idx
;
465 if (WARN_ON_ONCE(!(event
->hw
.state
& PERF_HES_STOPPED
)))
468 if (WARN_ON_ONCE(idx
== -1 || idx
>= UNCORE_PMC_IDX_MAX
))
472 box
->events
[idx
] = event
;
474 __set_bit(idx
, box
->active_mask
);
476 local64_set(&event
->hw
.prev_count
, uncore_read_counter(box
, event
));
477 uncore_enable_event(box
, event
);
479 if (box
->n_active
== 1) {
480 uncore_enable_box(box
);
481 uncore_pmu_start_hrtimer(box
);
485 static void uncore_pmu_event_stop(struct perf_event
*event
, int flags
)
487 struct intel_uncore_box
*box
= uncore_event_to_box(event
);
488 struct hw_perf_event
*hwc
= &event
->hw
;
490 if (__test_and_clear_bit(hwc
->idx
, box
->active_mask
)) {
491 uncore_disable_event(box
, event
);
493 box
->events
[hwc
->idx
] = NULL
;
494 WARN_ON_ONCE(hwc
->state
& PERF_HES_STOPPED
);
495 hwc
->state
|= PERF_HES_STOPPED
;
497 if (box
->n_active
== 0) {
498 uncore_disable_box(box
);
499 uncore_pmu_cancel_hrtimer(box
);
503 if ((flags
& PERF_EF_UPDATE
) && !(hwc
->state
& PERF_HES_UPTODATE
)) {
505 * Drain the remaining delta count out of a event
506 * that we are disabling:
508 uncore_perf_event_update(box
, event
);
509 hwc
->state
|= PERF_HES_UPTODATE
;
513 static int uncore_pmu_event_add(struct perf_event
*event
, int flags
)
515 struct intel_uncore_box
*box
= uncore_event_to_box(event
);
516 struct hw_perf_event
*hwc
= &event
->hw
;
517 int assign
[UNCORE_PMC_IDX_MAX
];
523 ret
= n
= uncore_collect_events(box
, event
, false);
527 hwc
->state
= PERF_HES_UPTODATE
| PERF_HES_STOPPED
;
528 if (!(flags
& PERF_EF_START
))
529 hwc
->state
|= PERF_HES_ARCH
;
531 ret
= uncore_assign_events(box
, assign
, n
);
535 /* save events moving to new counters */
536 for (i
= 0; i
< box
->n_events
; i
++) {
537 event
= box
->event_list
[i
];
540 if (hwc
->idx
== assign
[i
] &&
541 hwc
->last_tag
== box
->tags
[assign
[i
]])
544 * Ensure we don't accidentally enable a stopped
545 * counter simply because we rescheduled.
547 if (hwc
->state
& PERF_HES_STOPPED
)
548 hwc
->state
|= PERF_HES_ARCH
;
550 uncore_pmu_event_stop(event
, PERF_EF_UPDATE
);
553 /* reprogram moved events into new counters */
554 for (i
= 0; i
< n
; i
++) {
555 event
= box
->event_list
[i
];
558 if (hwc
->idx
!= assign
[i
] ||
559 hwc
->last_tag
!= box
->tags
[assign
[i
]])
560 uncore_assign_hw_event(box
, event
, assign
[i
]);
561 else if (i
< box
->n_events
)
564 if (hwc
->state
& PERF_HES_ARCH
)
567 uncore_pmu_event_start(event
, 0);
574 static void uncore_pmu_event_del(struct perf_event
*event
, int flags
)
576 struct intel_uncore_box
*box
= uncore_event_to_box(event
);
579 uncore_pmu_event_stop(event
, PERF_EF_UPDATE
);
581 for (i
= 0; i
< box
->n_events
; i
++) {
582 if (event
== box
->event_list
[i
]) {
583 uncore_put_event_constraint(box
, event
);
585 while (++i
< box
->n_events
)
586 box
->event_list
[i
- 1] = box
->event_list
[i
];
594 event
->hw
.last_tag
= ~0ULL;
597 void uncore_pmu_event_read(struct perf_event
*event
)
599 struct intel_uncore_box
*box
= uncore_event_to_box(event
);
600 uncore_perf_event_update(box
, event
);
604 * validation ensures the group can be loaded onto the
605 * PMU if it was the only group available.
607 static int uncore_validate_group(struct intel_uncore_pmu
*pmu
,
608 struct perf_event
*event
)
610 struct perf_event
*leader
= event
->group_leader
;
611 struct intel_uncore_box
*fake_box
;
612 int ret
= -EINVAL
, n
;
614 fake_box
= uncore_alloc_box(pmu
->type
, NUMA_NO_NODE
);
620 * the event is not yet connected with its
621 * siblings therefore we must first collect
622 * existing siblings, then add the new event
623 * before we can simulate the scheduling
625 n
= uncore_collect_events(fake_box
, leader
, true);
629 fake_box
->n_events
= n
;
630 n
= uncore_collect_events(fake_box
, event
, false);
634 fake_box
->n_events
= n
;
636 ret
= uncore_assign_events(fake_box
, NULL
, n
);
642 static int uncore_pmu_event_init(struct perf_event
*event
)
644 struct intel_uncore_pmu
*pmu
;
645 struct intel_uncore_box
*box
;
646 struct hw_perf_event
*hwc
= &event
->hw
;
649 if (event
->attr
.type
!= event
->pmu
->type
)
652 pmu
= uncore_event_to_pmu(event
);
653 /* no device found for this pmu */
654 if (pmu
->func_id
< 0)
658 * Uncore PMU does measure at all privilege level all the time.
659 * So it doesn't make sense to specify any exclude bits.
661 if (event
->attr
.exclude_user
|| event
->attr
.exclude_kernel
||
662 event
->attr
.exclude_hv
|| event
->attr
.exclude_idle
)
665 /* Sampling not supported yet */
666 if (hwc
->sample_period
)
670 * Place all uncore events for a particular physical package
675 box
= uncore_pmu_to_box(pmu
, event
->cpu
);
676 if (!box
|| box
->cpu
< 0)
678 event
->cpu
= box
->cpu
;
681 event
->hw
.last_tag
= ~0ULL;
682 event
->hw
.extra_reg
.idx
= EXTRA_REG_NONE
;
683 event
->hw
.branch_reg
.idx
= EXTRA_REG_NONE
;
685 if (event
->attr
.config
== UNCORE_FIXED_EVENT
) {
686 /* no fixed counter */
687 if (!pmu
->type
->fixed_ctl
)
690 * if there is only one fixed counter, only the first pmu
691 * can access the fixed counter
693 if (pmu
->type
->single_fixed
&& pmu
->pmu_idx
> 0)
696 /* fixed counters have event field hardcoded to zero */
699 hwc
->config
= event
->attr
.config
& pmu
->type
->event_mask
;
700 if (pmu
->type
->ops
->hw_config
) {
701 ret
= pmu
->type
->ops
->hw_config(box
, event
);
707 if (event
->group_leader
!= event
)
708 ret
= uncore_validate_group(pmu
, event
);
715 static ssize_t
uncore_get_attr_cpumask(struct device
*dev
,
716 struct device_attribute
*attr
, char *buf
)
718 return cpumap_print_to_pagebuf(true, buf
, &uncore_cpu_mask
);
721 static DEVICE_ATTR(cpumask
, S_IRUGO
, uncore_get_attr_cpumask
, NULL
);
723 static struct attribute
*uncore_pmu_attrs
[] = {
724 &dev_attr_cpumask
.attr
,
728 static struct attribute_group uncore_pmu_attr_group
= {
729 .attrs
= uncore_pmu_attrs
,
732 static int uncore_pmu_register(struct intel_uncore_pmu
*pmu
)
736 if (!pmu
->type
->pmu
) {
737 pmu
->pmu
= (struct pmu
) {
738 .attr_groups
= pmu
->type
->attr_groups
,
739 .task_ctx_nr
= perf_invalid_context
,
740 .event_init
= uncore_pmu_event_init
,
741 .add
= uncore_pmu_event_add
,
742 .del
= uncore_pmu_event_del
,
743 .start
= uncore_pmu_event_start
,
744 .stop
= uncore_pmu_event_stop
,
745 .read
= uncore_pmu_event_read
,
748 pmu
->pmu
= *pmu
->type
->pmu
;
749 pmu
->pmu
.attr_groups
= pmu
->type
->attr_groups
;
752 if (pmu
->type
->num_boxes
== 1) {
753 if (strlen(pmu
->type
->name
) > 0)
754 sprintf(pmu
->name
, "uncore_%s", pmu
->type
->name
);
756 sprintf(pmu
->name
, "uncore");
758 sprintf(pmu
->name
, "uncore_%s_%d", pmu
->type
->name
,
762 ret
= perf_pmu_register(&pmu
->pmu
, pmu
->name
, -1);
766 static void __init
uncore_type_exit(struct intel_uncore_type
*type
)
770 for (i
= 0; i
< type
->num_boxes
; i
++)
771 free_percpu(type
->pmus
[i
].box
);
774 kfree(type
->events_group
);
775 type
->events_group
= NULL
;
778 static void __init
uncore_types_exit(struct intel_uncore_type
**types
)
781 for (i
= 0; types
[i
]; i
++)
782 uncore_type_exit(types
[i
]);
785 static int __init
uncore_type_init(struct intel_uncore_type
*type
)
787 struct intel_uncore_pmu
*pmus
;
788 struct attribute_group
*attr_group
;
789 struct attribute
**attrs
;
792 pmus
= kzalloc(sizeof(*pmus
) * type
->num_boxes
, GFP_KERNEL
);
798 type
->unconstrainted
= (struct event_constraint
)
799 __EVENT_CONSTRAINT(0, (1ULL << type
->num_counters
) - 1,
800 0, type
->num_counters
, 0, 0);
802 for (i
= 0; i
< type
->num_boxes
; i
++) {
803 pmus
[i
].func_id
= -1;
806 INIT_LIST_HEAD(&pmus
[i
].box_list
);
807 pmus
[i
].box
= alloc_percpu(struct intel_uncore_box
*);
812 if (type
->event_descs
) {
814 while (type
->event_descs
[i
].attr
.attr
.name
)
817 attr_group
= kzalloc(sizeof(struct attribute
*) * (i
+ 1) +
818 sizeof(*attr_group
), GFP_KERNEL
);
822 attrs
= (struct attribute
**)(attr_group
+ 1);
823 attr_group
->name
= "events";
824 attr_group
->attrs
= attrs
;
826 for (j
= 0; j
< i
; j
++)
827 attrs
[j
] = &type
->event_descs
[j
].attr
.attr
;
829 type
->events_group
= attr_group
;
832 type
->pmu_group
= &uncore_pmu_attr_group
;
835 uncore_type_exit(type
);
839 static int __init
uncore_types_init(struct intel_uncore_type
**types
)
843 for (i
= 0; types
[i
]; i
++) {
844 ret
= uncore_type_init(types
[i
]);
851 uncore_type_exit(types
[i
]);
856 * add a pci uncore device
858 static int uncore_pci_probe(struct pci_dev
*pdev
, const struct pci_device_id
*id
)
860 struct intel_uncore_pmu
*pmu
;
861 struct intel_uncore_box
*box
;
862 struct intel_uncore_type
*type
;
864 bool first_box
= false;
866 phys_id
= uncore_pcibus_to_physid(pdev
->bus
);
870 if (UNCORE_PCI_DEV_TYPE(id
->driver_data
) == UNCORE_EXTRA_PCI_DEV
) {
871 int idx
= UNCORE_PCI_DEV_IDX(id
->driver_data
);
872 uncore_extra_pci_dev
[phys_id
][idx
] = pdev
;
873 pci_set_drvdata(pdev
, NULL
);
877 type
= uncore_pci_uncores
[UNCORE_PCI_DEV_TYPE(id
->driver_data
)];
878 box
= uncore_alloc_box(type
, NUMA_NO_NODE
);
883 * for performance monitoring unit with multiple boxes,
884 * each box has a different function id.
886 pmu
= &type
->pmus
[UNCORE_PCI_DEV_IDX(id
->driver_data
)];
887 /* Knights Landing uses a common PCI device ID for multiple instances of
888 * an uncore PMU device type. There is only one entry per device type in
889 * the knl_uncore_pci_ids table inspite of multiple devices present for
890 * some device types. Hence PCI device idx would be 0 for all devices.
891 * So increment pmu pointer to point to an unused array element.
893 if (boot_cpu_data
.x86_model
== 87)
894 while (pmu
->func_id
>= 0)
896 if (pmu
->func_id
< 0)
897 pmu
->func_id
= pdev
->devfn
;
899 WARN_ON_ONCE(pmu
->func_id
!= pdev
->devfn
);
901 box
->phys_id
= phys_id
;
904 uncore_box_init(box
);
905 pci_set_drvdata(pdev
, box
);
907 raw_spin_lock(&uncore_box_lock
);
908 if (list_empty(&pmu
->box_list
))
910 list_add_tail(&box
->list
, &pmu
->box_list
);
911 raw_spin_unlock(&uncore_box_lock
);
914 uncore_pmu_register(pmu
);
918 static void uncore_pci_remove(struct pci_dev
*pdev
)
920 struct intel_uncore_box
*box
= pci_get_drvdata(pdev
);
921 struct intel_uncore_pmu
*pmu
;
923 bool last_box
= false;
925 phys_id
= uncore_pcibus_to_physid(pdev
->bus
);
926 box
= pci_get_drvdata(pdev
);
928 for (i
= 0; i
< UNCORE_EXTRA_PCI_DEV_MAX
; i
++) {
929 if (uncore_extra_pci_dev
[phys_id
][i
] == pdev
) {
930 uncore_extra_pci_dev
[phys_id
][i
] = NULL
;
934 WARN_ON_ONCE(i
>= UNCORE_EXTRA_PCI_DEV_MAX
);
939 if (WARN_ON_ONCE(phys_id
!= box
->phys_id
))
942 pci_set_drvdata(pdev
, NULL
);
944 raw_spin_lock(&uncore_box_lock
);
945 list_del(&box
->list
);
946 if (list_empty(&pmu
->box_list
))
948 raw_spin_unlock(&uncore_box_lock
);
950 for_each_possible_cpu(cpu
) {
951 if (*per_cpu_ptr(pmu
->box
, cpu
) == box
) {
952 *per_cpu_ptr(pmu
->box
, cpu
) = NULL
;
953 atomic_dec(&box
->refcnt
);
957 WARN_ON_ONCE(atomic_read(&box
->refcnt
) != 1);
961 perf_pmu_unregister(&pmu
->pmu
);
964 static int __init
uncore_pci_init(void)
968 switch (boot_cpu_data
.x86_model
) {
969 case 45: /* Sandy Bridge-EP */
970 ret
= snbep_uncore_pci_init();
972 case 62: /* Ivy Bridge-EP */
973 ret
= ivbep_uncore_pci_init();
975 case 63: /* Haswell-EP */
976 ret
= hswep_uncore_pci_init();
978 case 79: /* BDX-EP */
979 case 86: /* BDX-DE */
980 ret
= bdx_uncore_pci_init();
982 case 42: /* Sandy Bridge */
983 ret
= snb_uncore_pci_init();
985 case 58: /* Ivy Bridge */
986 ret
= ivb_uncore_pci_init();
988 case 60: /* Haswell */
989 case 69: /* Haswell Celeron */
990 ret
= hsw_uncore_pci_init();
992 case 61: /* Broadwell */
993 ret
= bdw_uncore_pci_init();
995 case 87: /* Knights Landing */
996 ret
= knl_uncore_pci_init();
998 case 94: /* SkyLake */
999 ret
= skl_uncore_pci_init();
1008 ret
= uncore_types_init(uncore_pci_uncores
);
1012 uncore_pci_driver
->probe
= uncore_pci_probe
;
1013 uncore_pci_driver
->remove
= uncore_pci_remove
;
1015 ret
= pci_register_driver(uncore_pci_driver
);
1017 pcidrv_registered
= true;
1019 uncore_types_exit(uncore_pci_uncores
);
1024 static void __init
uncore_pci_exit(void)
1026 if (pcidrv_registered
) {
1027 pcidrv_registered
= false;
1028 pci_unregister_driver(uncore_pci_driver
);
1029 uncore_types_exit(uncore_pci_uncores
);
1033 /* CPU hot plug/unplug are serialized by cpu_add_remove_lock mutex */
1034 static LIST_HEAD(boxes_to_free
);
1036 static void uncore_kfree_boxes(void)
1038 struct intel_uncore_box
*box
;
1040 while (!list_empty(&boxes_to_free
)) {
1041 box
= list_entry(boxes_to_free
.next
,
1042 struct intel_uncore_box
, list
);
1043 list_del(&box
->list
);
1048 static void uncore_cpu_dying(int cpu
)
1050 struct intel_uncore_type
*type
;
1051 struct intel_uncore_pmu
*pmu
;
1052 struct intel_uncore_box
*box
;
1055 for (i
= 0; uncore_msr_uncores
[i
]; i
++) {
1056 type
= uncore_msr_uncores
[i
];
1057 for (j
= 0; j
< type
->num_boxes
; j
++) {
1058 pmu
= &type
->pmus
[j
];
1059 box
= *per_cpu_ptr(pmu
->box
, cpu
);
1060 *per_cpu_ptr(pmu
->box
, cpu
) = NULL
;
1061 if (box
&& atomic_dec_and_test(&box
->refcnt
))
1062 list_add(&box
->list
, &boxes_to_free
);
1067 static int uncore_cpu_starting(int cpu
)
1069 struct intel_uncore_type
*type
;
1070 struct intel_uncore_pmu
*pmu
;
1071 struct intel_uncore_box
*box
, *exist
;
1072 int i
, j
, k
, phys_id
;
1074 phys_id
= topology_physical_package_id(cpu
);
1076 for (i
= 0; uncore_msr_uncores
[i
]; i
++) {
1077 type
= uncore_msr_uncores
[i
];
1078 for (j
= 0; j
< type
->num_boxes
; j
++) {
1079 pmu
= &type
->pmus
[j
];
1080 box
= *per_cpu_ptr(pmu
->box
, cpu
);
1081 /* called by uncore_cpu_init? */
1082 if (box
&& box
->phys_id
>= 0) {
1083 uncore_box_init(box
);
1087 for_each_online_cpu(k
) {
1088 exist
= *per_cpu_ptr(pmu
->box
, k
);
1089 if (exist
&& exist
->phys_id
== phys_id
) {
1090 atomic_inc(&exist
->refcnt
);
1091 *per_cpu_ptr(pmu
->box
, cpu
) = exist
;
1093 list_add(&box
->list
,
1102 box
->phys_id
= phys_id
;
1103 uncore_box_init(box
);
1110 static int uncore_cpu_prepare(int cpu
, int phys_id
)
1112 struct intel_uncore_type
*type
;
1113 struct intel_uncore_pmu
*pmu
;
1114 struct intel_uncore_box
*box
;
1117 for (i
= 0; uncore_msr_uncores
[i
]; i
++) {
1118 type
= uncore_msr_uncores
[i
];
1119 for (j
= 0; j
< type
->num_boxes
; j
++) {
1120 pmu
= &type
->pmus
[j
];
1121 if (pmu
->func_id
< 0)
1124 box
= uncore_alloc_box(type
, cpu_to_node(cpu
));
1129 box
->phys_id
= phys_id
;
1130 *per_cpu_ptr(pmu
->box
, cpu
) = box
;
1137 uncore_change_context(struct intel_uncore_type
**uncores
, int old_cpu
, int new_cpu
)
1139 struct intel_uncore_type
*type
;
1140 struct intel_uncore_pmu
*pmu
;
1141 struct intel_uncore_box
*box
;
1144 for (i
= 0; uncores
[i
]; i
++) {
1146 for (j
= 0; j
< type
->num_boxes
; j
++) {
1147 pmu
= &type
->pmus
[j
];
1149 box
= uncore_pmu_to_box(pmu
, new_cpu
);
1151 box
= uncore_pmu_to_box(pmu
, old_cpu
);
1156 WARN_ON_ONCE(box
->cpu
!= -1);
1161 WARN_ON_ONCE(box
->cpu
!= old_cpu
);
1163 uncore_pmu_cancel_hrtimer(box
);
1164 perf_pmu_migrate_context(&pmu
->pmu
,
1174 static void uncore_event_exit_cpu(int cpu
)
1176 int i
, phys_id
, target
;
1178 /* if exiting cpu is used for collecting uncore events */
1179 if (!cpumask_test_and_clear_cpu(cpu
, &uncore_cpu_mask
))
1182 /* find a new cpu to collect uncore events */
1183 phys_id
= topology_physical_package_id(cpu
);
1185 for_each_online_cpu(i
) {
1188 if (phys_id
== topology_physical_package_id(i
)) {
1194 /* migrate uncore events to the new cpu */
1196 cpumask_set_cpu(target
, &uncore_cpu_mask
);
1198 uncore_change_context(uncore_msr_uncores
, cpu
, target
);
1199 uncore_change_context(uncore_pci_uncores
, cpu
, target
);
1202 static void uncore_event_init_cpu(int cpu
)
1206 phys_id
= topology_physical_package_id(cpu
);
1207 for_each_cpu(i
, &uncore_cpu_mask
) {
1208 if (phys_id
== topology_physical_package_id(i
))
1212 cpumask_set_cpu(cpu
, &uncore_cpu_mask
);
1214 uncore_change_context(uncore_msr_uncores
, -1, cpu
);
1215 uncore_change_context(uncore_pci_uncores
, -1, cpu
);
1218 static int uncore_cpu_notifier(struct notifier_block
*self
,
1219 unsigned long action
, void *hcpu
)
1221 unsigned int cpu
= (long)hcpu
;
1223 /* allocate/free data structure for uncore box */
1224 switch (action
& ~CPU_TASKS_FROZEN
) {
1225 case CPU_UP_PREPARE
:
1226 uncore_cpu_prepare(cpu
, -1);
1229 uncore_cpu_starting(cpu
);
1231 case CPU_UP_CANCELED
:
1233 uncore_cpu_dying(cpu
);
1237 uncore_kfree_boxes();
1243 /* select the cpu that collects uncore events */
1244 switch (action
& ~CPU_TASKS_FROZEN
) {
1245 case CPU_DOWN_FAILED
:
1247 uncore_event_init_cpu(cpu
);
1249 case CPU_DOWN_PREPARE
:
1250 uncore_event_exit_cpu(cpu
);
1259 static struct notifier_block uncore_cpu_nb
= {
1260 .notifier_call
= uncore_cpu_notifier
,
1262 * to migrate uncore events, our notifier should be executed
1263 * before perf core's notifier.
1265 .priority
= CPU_PRI_PERF
+ 1,
1268 static void __init
uncore_cpu_setup(void *dummy
)
1270 uncore_cpu_starting(smp_processor_id());
1273 static int __init
uncore_cpu_init(void)
1277 switch (boot_cpu_data
.x86_model
) {
1278 case 26: /* Nehalem */
1280 case 37: /* Westmere */
1282 nhm_uncore_cpu_init();
1284 case 42: /* Sandy Bridge */
1285 case 58: /* Ivy Bridge */
1286 case 60: /* Haswell */
1287 case 69: /* Haswell */
1288 case 70: /* Haswell */
1289 case 61: /* Broadwell */
1290 case 71: /* Broadwell */
1291 snb_uncore_cpu_init();
1293 case 45: /* Sandy Bridge-EP */
1294 snbep_uncore_cpu_init();
1296 case 46: /* Nehalem-EX */
1297 case 47: /* Westmere-EX aka. Xeon E7 */
1298 nhmex_uncore_cpu_init();
1300 case 62: /* Ivy Bridge-EP */
1301 ivbep_uncore_cpu_init();
1303 case 63: /* Haswell-EP */
1304 hswep_uncore_cpu_init();
1306 case 79: /* BDX-EP */
1307 case 86: /* BDX-DE */
1308 bdx_uncore_cpu_init();
1310 case 87: /* Knights Landing */
1311 knl_uncore_cpu_init();
1317 ret
= uncore_types_init(uncore_msr_uncores
);
1324 static int __init
uncore_pmus_register(void)
1326 struct intel_uncore_pmu
*pmu
;
1327 struct intel_uncore_type
*type
;
1330 for (i
= 0; uncore_msr_uncores
[i
]; i
++) {
1331 type
= uncore_msr_uncores
[i
];
1332 for (j
= 0; j
< type
->num_boxes
; j
++) {
1333 pmu
= &type
->pmus
[j
];
1334 uncore_pmu_register(pmu
);
1341 static void __init
uncore_cpumask_init(void)
1346 * ony invoke once from msr or pci init code
1348 if (!cpumask_empty(&uncore_cpu_mask
))
1351 cpu_notifier_register_begin();
1353 for_each_online_cpu(cpu
) {
1354 int i
, phys_id
= topology_physical_package_id(cpu
);
1356 for_each_cpu(i
, &uncore_cpu_mask
) {
1357 if (phys_id
== topology_physical_package_id(i
)) {
1365 uncore_cpu_prepare(cpu
, phys_id
);
1366 uncore_event_init_cpu(cpu
);
1368 on_each_cpu(uncore_cpu_setup
, NULL
, 1);
1370 __register_cpu_notifier(&uncore_cpu_nb
);
1372 cpu_notifier_register_done();
1376 static int __init
intel_uncore_init(void)
1380 if (boot_cpu_data
.x86_vendor
!= X86_VENDOR_INTEL
)
1383 if (cpu_has_hypervisor
)
1386 ret
= uncore_pci_init();
1389 ret
= uncore_cpu_init();
1394 uncore_cpumask_init();
1396 uncore_pmus_register();
1401 device_initcall(intel_uncore_init
);