]> git.proxmox.com Git - mirror_ubuntu-hirsute-kernel.git/blame - drivers/gpu/drm/i915/i915_pmu.c
cpumask: Make for_each_cpu_wrap() available on UP as well
[mirror_ubuntu-hirsute-kernel.git] / drivers / gpu / drm / i915 / i915_pmu.c
CommitLineData
b46a33e2
TU
1/*
2 * Copyright © 2017 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 *
23 */
24
25#include <linux/perf_event.h>
26#include <linux/pm_runtime.h>
27
28#include "i915_drv.h"
29#include "i915_pmu.h"
30#include "intel_ringbuffer.h"
31
32/* Frequency for the sampling timer for events which need it. */
33#define FREQUENCY 200
34#define PERIOD max_t(u64, 10000, NSEC_PER_SEC / FREQUENCY)
35
36#define ENGINE_SAMPLE_MASK \
37 (BIT(I915_SAMPLE_BUSY) | \
38 BIT(I915_SAMPLE_WAIT) | \
39 BIT(I915_SAMPLE_SEMA))
40
41#define ENGINE_SAMPLE_BITS (1 << I915_PMU_SAMPLE_BITS)
42
141a0895 43static cpumask_t i915_pmu_cpumask;
b46a33e2
TU
44
45static u8 engine_config_sample(u64 config)
46{
47 return config & I915_PMU_SAMPLE_MASK;
48}
49
50static u8 engine_event_sample(struct perf_event *event)
51{
52 return engine_config_sample(event->attr.config);
53}
54
55static u8 engine_event_class(struct perf_event *event)
56{
57 return (event->attr.config >> I915_PMU_CLASS_SHIFT) & 0xff;
58}
59
60static u8 engine_event_instance(struct perf_event *event)
61{
62 return (event->attr.config >> I915_PMU_SAMPLE_BITS) & 0xff;
63}
64
65static bool is_engine_config(u64 config)
66{
67 return config < __I915_PMU_OTHER(0);
68}
69
70static unsigned int config_enabled_bit(u64 config)
71{
72 if (is_engine_config(config))
73 return engine_config_sample(config);
74 else
75 return ENGINE_SAMPLE_BITS + (config - __I915_PMU_OTHER(0));
76}
77
78static u64 config_enabled_mask(u64 config)
79{
80 return BIT_ULL(config_enabled_bit(config));
81}
82
83static bool is_engine_event(struct perf_event *event)
84{
85 return is_engine_config(event->attr.config);
86}
87
88static unsigned int event_enabled_bit(struct perf_event *event)
89{
90 return config_enabled_bit(event->attr.config);
91}
92
feff0dc6
TU
93static bool pmu_needs_timer(struct drm_i915_private *i915, bool gpu_active)
94{
95 u64 enable;
96
97 /*
98 * Only some counters need the sampling timer.
99 *
100 * We start with a bitmask of all currently enabled events.
101 */
102 enable = i915->pmu.enable;
103
104 /*
105 * Mask out all the ones which do not need the timer, or in
106 * other words keep all the ones that could need the timer.
107 */
108 enable &= config_enabled_mask(I915_PMU_ACTUAL_FREQUENCY) |
109 config_enabled_mask(I915_PMU_REQUESTED_FREQUENCY) |
110 ENGINE_SAMPLE_MASK;
111
112 /*
113 * When the GPU is idle per-engine counters do not need to be
114 * running so clear those bits out.
115 */
116 if (!gpu_active)
117 enable &= ~ENGINE_SAMPLE_MASK;
b3add01e
TU
118 /*
119 * Also there is software busyness tracking available we do not
120 * need the timer for I915_SAMPLE_BUSY counter.
cf669b4e
TU
121 *
122 * Use RCS as proxy for all engines.
b3add01e 123 */
cf669b4e 124 else if (intel_engine_supports_stats(i915->engine[RCS]))
b3add01e 125 enable &= ~BIT(I915_SAMPLE_BUSY);
feff0dc6
TU
126
127 /*
128 * If some bits remain it means we need the sampling timer running.
129 */
130 return enable;
131}
132
133void i915_pmu_gt_parked(struct drm_i915_private *i915)
134{
135 if (!i915->pmu.base.event_init)
136 return;
137
138 spin_lock_irq(&i915->pmu.lock);
139 /*
140 * Signal sampling timer to stop if only engine events are enabled and
141 * GPU went idle.
142 */
143 i915->pmu.timer_enabled = pmu_needs_timer(i915, false);
144 spin_unlock_irq(&i915->pmu.lock);
145}
146
147static void __i915_pmu_maybe_start_timer(struct drm_i915_private *i915)
148{
149 if (!i915->pmu.timer_enabled && pmu_needs_timer(i915, true)) {
150 i915->pmu.timer_enabled = true;
151 hrtimer_start_range_ns(&i915->pmu.timer,
152 ns_to_ktime(PERIOD), 0,
153 HRTIMER_MODE_REL_PINNED);
154 }
155}
156
157void i915_pmu_gt_unparked(struct drm_i915_private *i915)
158{
159 if (!i915->pmu.base.event_init)
160 return;
161
162 spin_lock_irq(&i915->pmu.lock);
163 /*
164 * Re-enable sampling timer when GPU goes active.
165 */
166 __i915_pmu_maybe_start_timer(i915);
167 spin_unlock_irq(&i915->pmu.lock);
168}
169
b46a33e2
TU
170static bool grab_forcewake(struct drm_i915_private *i915, bool fw)
171{
172 if (!fw)
173 intel_uncore_forcewake_get(i915, FORCEWAKE_ALL);
174
175 return true;
176}
177
178static void
179update_sample(struct i915_pmu_sample *sample, u32 unit, u32 val)
180{
8ee4f19c 181 sample->cur += mul_u32_u32(val, unit);
b46a33e2
TU
182}
183
184static void engines_sample(struct drm_i915_private *dev_priv)
185{
186 struct intel_engine_cs *engine;
187 enum intel_engine_id id;
188 bool fw = false;
189
190 if ((dev_priv->pmu.enable & ENGINE_SAMPLE_MASK) == 0)
191 return;
192
193 if (!dev_priv->gt.awake)
194 return;
195
196 if (!intel_runtime_pm_get_if_in_use(dev_priv))
197 return;
198
199 for_each_engine(engine, dev_priv, id) {
200 u32 current_seqno = intel_engine_get_seqno(engine);
201 u32 last_seqno = intel_engine_last_submit(engine);
202 u32 val;
203
204 val = !i915_seqno_passed(current_seqno, last_seqno);
205
206 update_sample(&engine->pmu.sample[I915_SAMPLE_BUSY],
207 PERIOD, val);
208
209 if (val && (engine->pmu.enable &
210 (BIT(I915_SAMPLE_WAIT) | BIT(I915_SAMPLE_SEMA)))) {
211 fw = grab_forcewake(dev_priv, fw);
212
213 val = I915_READ_FW(RING_CTL(engine->mmio_base));
214 } else {
215 val = 0;
216 }
217
218 update_sample(&engine->pmu.sample[I915_SAMPLE_WAIT],
219 PERIOD, !!(val & RING_WAIT));
220
221 update_sample(&engine->pmu.sample[I915_SAMPLE_SEMA],
222 PERIOD, !!(val & RING_WAIT_SEMAPHORE));
223 }
224
225 if (fw)
226 intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
227
228 intel_runtime_pm_put(dev_priv);
229}
230
231static void frequency_sample(struct drm_i915_private *dev_priv)
232{
233 if (dev_priv->pmu.enable &
234 config_enabled_mask(I915_PMU_ACTUAL_FREQUENCY)) {
235 u32 val;
236
237 val = dev_priv->gt_pm.rps.cur_freq;
238 if (dev_priv->gt.awake &&
239 intel_runtime_pm_get_if_in_use(dev_priv)) {
240 val = intel_get_cagf(dev_priv,
241 I915_READ_NOTRACE(GEN6_RPSTAT1));
242 intel_runtime_pm_put(dev_priv);
243 }
244
245 update_sample(&dev_priv->pmu.sample[__I915_SAMPLE_FREQ_ACT],
246 1, intel_gpu_freq(dev_priv, val));
247 }
248
249 if (dev_priv->pmu.enable &
250 config_enabled_mask(I915_PMU_REQUESTED_FREQUENCY)) {
251 update_sample(&dev_priv->pmu.sample[__I915_SAMPLE_FREQ_REQ], 1,
252 intel_gpu_freq(dev_priv,
253 dev_priv->gt_pm.rps.cur_freq));
254 }
255}
256
257static enum hrtimer_restart i915_sample(struct hrtimer *hrtimer)
258{
259 struct drm_i915_private *i915 =
260 container_of(hrtimer, struct drm_i915_private, pmu.timer);
261
8ee4f19c 262 if (!READ_ONCE(i915->pmu.timer_enabled))
b46a33e2
TU
263 return HRTIMER_NORESTART;
264
265 engines_sample(i915);
266 frequency_sample(i915);
267
268 hrtimer_forward_now(hrtimer, ns_to_ktime(PERIOD));
269 return HRTIMER_RESTART;
270}
271
0cd4684d
TU
272static u64 count_interrupts(struct drm_i915_private *i915)
273{
274 /* open-coded kstat_irqs() */
275 struct irq_desc *desc = irq_to_desc(i915->drm.pdev->irq);
276 u64 sum = 0;
277 int cpu;
278
279 if (!desc || !desc->kstat_irqs)
280 return 0;
281
282 for_each_possible_cpu(cpu)
283 sum += *per_cpu_ptr(desc->kstat_irqs, cpu);
284
285 return sum;
286}
287
b46a33e2
TU
288static void i915_pmu_event_destroy(struct perf_event *event)
289{
290 WARN_ON(event->parent);
291}
292
293static int engine_event_init(struct perf_event *event)
294{
295 struct drm_i915_private *i915 =
296 container_of(event->pmu, typeof(*i915), pmu.base);
297
298 if (!intel_engine_lookup_user(i915, engine_event_class(event),
299 engine_event_instance(event)))
300 return -ENODEV;
301
302 switch (engine_event_sample(event)) {
303 case I915_SAMPLE_BUSY:
304 case I915_SAMPLE_WAIT:
305 break;
306 case I915_SAMPLE_SEMA:
307 if (INTEL_GEN(i915) < 6)
308 return -ENODEV;
309 break;
310 default:
311 return -ENOENT;
312 }
313
314 return 0;
315}
316
317static int i915_pmu_event_init(struct perf_event *event)
318{
319 struct drm_i915_private *i915 =
320 container_of(event->pmu, typeof(*i915), pmu.base);
0426c046 321 int ret;
b46a33e2
TU
322
323 if (event->attr.type != event->pmu->type)
324 return -ENOENT;
325
326 /* unsupported modes and filters */
327 if (event->attr.sample_period) /* no sampling */
328 return -EINVAL;
329
330 if (has_branch_stack(event))
331 return -EOPNOTSUPP;
332
333 if (event->cpu < 0)
334 return -EINVAL;
335
0426c046
TU
336 /* only allow running on one cpu at a time */
337 if (!cpumask_test_cpu(event->cpu, &i915_pmu_cpumask))
00a79722 338 return -EINVAL;
b46a33e2
TU
339
340 if (is_engine_event(event)) {
341 ret = engine_event_init(event);
342 } else {
343 ret = 0;
344 switch (event->attr.config) {
345 case I915_PMU_ACTUAL_FREQUENCY:
346 if (IS_VALLEYVIEW(i915) || IS_CHERRYVIEW(i915))
347 /* Requires a mutex for sampling! */
348 ret = -ENODEV;
349 case I915_PMU_REQUESTED_FREQUENCY:
350 if (INTEL_GEN(i915) < 6)
351 ret = -ENODEV;
352 break;
0cd4684d
TU
353 case I915_PMU_INTERRUPTS:
354 break;
6060b6ae
TU
355 case I915_PMU_RC6_RESIDENCY:
356 if (!HAS_RC6(i915))
357 ret = -ENODEV;
358 break;
b46a33e2
TU
359 default:
360 ret = -ENOENT;
361 break;
362 }
363 }
364 if (ret)
365 return ret;
366
b46a33e2
TU
367 if (!event->parent)
368 event->destroy = i915_pmu_event_destroy;
369
370 return 0;
371}
372
373static u64 __i915_pmu_event_read(struct perf_event *event)
374{
375 struct drm_i915_private *i915 =
376 container_of(event->pmu, typeof(*i915), pmu.base);
377 u64 val = 0;
378
379 if (is_engine_event(event)) {
380 u8 sample = engine_event_sample(event);
381 struct intel_engine_cs *engine;
382
383 engine = intel_engine_lookup_user(i915,
384 engine_event_class(event),
385 engine_event_instance(event));
386
387 if (WARN_ON_ONCE(!engine)) {
388 /* Do nothing */
b3add01e
TU
389 } else if (sample == I915_SAMPLE_BUSY &&
390 engine->pmu.busy_stats) {
391 val = ktime_to_ns(intel_engine_get_busy_time(engine));
b46a33e2
TU
392 } else {
393 val = engine->pmu.sample[sample].cur;
394 }
395 } else {
396 switch (event->attr.config) {
397 case I915_PMU_ACTUAL_FREQUENCY:
398 val =
399 div_u64(i915->pmu.sample[__I915_SAMPLE_FREQ_ACT].cur,
400 FREQUENCY);
401 break;
402 case I915_PMU_REQUESTED_FREQUENCY:
403 val =
404 div_u64(i915->pmu.sample[__I915_SAMPLE_FREQ_REQ].cur,
405 FREQUENCY);
406 break;
0cd4684d
TU
407 case I915_PMU_INTERRUPTS:
408 val = count_interrupts(i915);
409 break;
6060b6ae
TU
410 case I915_PMU_RC6_RESIDENCY:
411 intel_runtime_pm_get(i915);
412 val = intel_rc6_residency_ns(i915,
413 IS_VALLEYVIEW(i915) ?
414 VLV_GT_RENDER_RC6 :
415 GEN6_GT_GFX_RC6);
fb6db0f5 416 if (HAS_RC6p(i915))
3452fa30
TU
417 val += intel_rc6_residency_ns(i915,
418 GEN6_GT_GFX_RC6p);
fb6db0f5 419 if (HAS_RC6pp(i915))
3452fa30
TU
420 val += intel_rc6_residency_ns(i915,
421 GEN6_GT_GFX_RC6pp);
6060b6ae
TU
422 intel_runtime_pm_put(i915);
423 break;
b46a33e2
TU
424 }
425 }
426
427 return val;
428}
429
430static void i915_pmu_event_read(struct perf_event *event)
431{
432 struct hw_perf_event *hwc = &event->hw;
433 u64 prev, new;
434
435again:
436 prev = local64_read(&hwc->prev_count);
437 new = __i915_pmu_event_read(event);
438
439 if (local64_cmpxchg(&hwc->prev_count, prev, new) != prev)
440 goto again;
441
442 local64_add(new - prev, &event->count);
443}
444
b3add01e
TU
445static bool engine_needs_busy_stats(struct intel_engine_cs *engine)
446{
cf669b4e 447 return intel_engine_supports_stats(engine) &&
b3add01e
TU
448 (engine->pmu.enable & BIT(I915_SAMPLE_BUSY));
449}
450
b46a33e2
TU
451static void i915_pmu_enable(struct perf_event *event)
452{
453 struct drm_i915_private *i915 =
454 container_of(event->pmu, typeof(*i915), pmu.base);
455 unsigned int bit = event_enabled_bit(event);
456 unsigned long flags;
457
458 spin_lock_irqsave(&i915->pmu.lock, flags);
459
b46a33e2
TU
460 /*
461 * Update the bitmask of enabled events and increment
462 * the event reference counter.
463 */
464 GEM_BUG_ON(bit >= I915_PMU_MASK_BITS);
465 GEM_BUG_ON(i915->pmu.enable_count[bit] == ~0);
466 i915->pmu.enable |= BIT_ULL(bit);
467 i915->pmu.enable_count[bit]++;
468
feff0dc6
TU
469 /*
470 * Start the sampling timer if needed and not already enabled.
471 */
472 __i915_pmu_maybe_start_timer(i915);
473
b46a33e2
TU
474 /*
475 * For per-engine events the bitmask and reference counting
476 * is stored per engine.
477 */
478 if (is_engine_event(event)) {
479 u8 sample = engine_event_sample(event);
480 struct intel_engine_cs *engine;
481
482 engine = intel_engine_lookup_user(i915,
483 engine_event_class(event),
484 engine_event_instance(event));
485 GEM_BUG_ON(!engine);
486 engine->pmu.enable |= BIT(sample);
487
488 GEM_BUG_ON(sample >= I915_PMU_SAMPLE_BITS);
489 GEM_BUG_ON(engine->pmu.enable_count[sample] == ~0);
b3add01e
TU
490 if (engine->pmu.enable_count[sample]++ == 0) {
491 /*
492 * Enable engine busy stats tracking if needed or
493 * alternatively cancel the scheduled disable.
494 *
495 * If the delayed disable was pending, cancel it and
496 * in this case do not enable since it already is.
497 */
498 if (engine_needs_busy_stats(engine) &&
499 !engine->pmu.busy_stats) {
500 engine->pmu.busy_stats = true;
501 if (!cancel_delayed_work(&engine->pmu.disable_busy_stats))
502 intel_enable_engine_stats(engine);
503 }
504 }
b46a33e2
TU
505 }
506
507 /*
508 * Store the current counter value so we can report the correct delta
509 * for all listeners. Even when the event was already enabled and has
510 * an existing non-zero value.
511 */
512 local64_set(&event->hw.prev_count, __i915_pmu_event_read(event));
513
514 spin_unlock_irqrestore(&i915->pmu.lock, flags);
515}
516
b3add01e
TU
517static void __disable_busy_stats(struct work_struct *work)
518{
519 struct intel_engine_cs *engine =
520 container_of(work, typeof(*engine), pmu.disable_busy_stats.work);
521
522 intel_disable_engine_stats(engine);
523}
524
b46a33e2
TU
525static void i915_pmu_disable(struct perf_event *event)
526{
527 struct drm_i915_private *i915 =
528 container_of(event->pmu, typeof(*i915), pmu.base);
529 unsigned int bit = event_enabled_bit(event);
530 unsigned long flags;
531
532 spin_lock_irqsave(&i915->pmu.lock, flags);
533
534 if (is_engine_event(event)) {
535 u8 sample = engine_event_sample(event);
536 struct intel_engine_cs *engine;
537
538 engine = intel_engine_lookup_user(i915,
539 engine_event_class(event),
540 engine_event_instance(event));
541 GEM_BUG_ON(!engine);
542 GEM_BUG_ON(sample >= I915_PMU_SAMPLE_BITS);
543 GEM_BUG_ON(engine->pmu.enable_count[sample] == 0);
544 /*
545 * Decrement the reference count and clear the enabled
546 * bitmask when the last listener on an event goes away.
547 */
b3add01e 548 if (--engine->pmu.enable_count[sample] == 0) {
b46a33e2 549 engine->pmu.enable &= ~BIT(sample);
b3add01e
TU
550 if (!engine_needs_busy_stats(engine) &&
551 engine->pmu.busy_stats) {
552 engine->pmu.busy_stats = false;
553 /*
554 * We request a delayed disable to handle the
555 * rapid on/off cycles on events, which can
556 * happen when tools like perf stat start, in a
557 * nicer way.
558 *
559 * In addition, this also helps with busy stats
560 * accuracy with background CPU offline/online
561 * migration events.
562 */
563 queue_delayed_work(system_wq,
564 &engine->pmu.disable_busy_stats,
565 round_jiffies_up_relative(HZ));
566 }
567 }
b46a33e2
TU
568 }
569
570 GEM_BUG_ON(bit >= I915_PMU_MASK_BITS);
571 GEM_BUG_ON(i915->pmu.enable_count[bit] == 0);
572 /*
573 * Decrement the reference count and clear the enabled
574 * bitmask when the last listener on an event goes away.
575 */
feff0dc6 576 if (--i915->pmu.enable_count[bit] == 0) {
b46a33e2 577 i915->pmu.enable &= ~BIT_ULL(bit);
feff0dc6
TU
578 i915->pmu.timer_enabled &= pmu_needs_timer(i915, true);
579 }
b46a33e2
TU
580
581 spin_unlock_irqrestore(&i915->pmu.lock, flags);
582}
583
584static void i915_pmu_event_start(struct perf_event *event, int flags)
585{
586 i915_pmu_enable(event);
587 event->hw.state = 0;
588}
589
590static void i915_pmu_event_stop(struct perf_event *event, int flags)
591{
592 if (flags & PERF_EF_UPDATE)
593 i915_pmu_event_read(event);
594 i915_pmu_disable(event);
595 event->hw.state = PERF_HES_STOPPED;
596}
597
598static int i915_pmu_event_add(struct perf_event *event, int flags)
599{
600 if (flags & PERF_EF_START)
601 i915_pmu_event_start(event, flags);
602
603 return 0;
604}
605
606static void i915_pmu_event_del(struct perf_event *event, int flags)
607{
608 i915_pmu_event_stop(event, PERF_EF_UPDATE);
609}
610
611static int i915_pmu_event_event_idx(struct perf_event *event)
612{
613 return 0;
614}
615
b7d3aabf
CW
616struct i915_str_attribute {
617 struct device_attribute attr;
618 const char *str;
619};
620
b46a33e2
TU
621static ssize_t i915_pmu_format_show(struct device *dev,
622 struct device_attribute *attr, char *buf)
623{
b7d3aabf 624 struct i915_str_attribute *eattr;
b46a33e2 625
b7d3aabf
CW
626 eattr = container_of(attr, struct i915_str_attribute, attr);
627 return sprintf(buf, "%s\n", eattr->str);
b46a33e2
TU
628}
629
630#define I915_PMU_FORMAT_ATTR(_name, _config) \
b7d3aabf 631 (&((struct i915_str_attribute[]) { \
b46a33e2 632 { .attr = __ATTR(_name, 0444, i915_pmu_format_show, NULL), \
b7d3aabf 633 .str = _config, } \
b46a33e2
TU
634 })[0].attr.attr)
635
636static struct attribute *i915_pmu_format_attrs[] = {
637 I915_PMU_FORMAT_ATTR(i915_eventid, "config:0-20"),
638 NULL,
639};
640
641static const struct attribute_group i915_pmu_format_attr_group = {
642 .name = "format",
643 .attrs = i915_pmu_format_attrs,
644};
645
b7d3aabf
CW
646struct i915_ext_attribute {
647 struct device_attribute attr;
648 unsigned long val;
649};
650
b46a33e2
TU
651static ssize_t i915_pmu_event_show(struct device *dev,
652 struct device_attribute *attr, char *buf)
653{
b7d3aabf 654 struct i915_ext_attribute *eattr;
b46a33e2 655
b7d3aabf
CW
656 eattr = container_of(attr, struct i915_ext_attribute, attr);
657 return sprintf(buf, "config=0x%lx\n", eattr->val);
b46a33e2
TU
658}
659
660#define I915_EVENT_ATTR(_name, _config) \
b7d3aabf 661 (&((struct i915_ext_attribute[]) { \
b46a33e2 662 { .attr = __ATTR(_name, 0444, i915_pmu_event_show, NULL), \
b7d3aabf 663 .val = _config, } \
b46a33e2
TU
664 })[0].attr.attr)
665
666#define I915_EVENT_STR(_name, _str) \
667 (&((struct perf_pmu_events_attr[]) { \
668 { .attr = __ATTR(_name, 0444, perf_event_sysfs_show, NULL), \
669 .id = 0, \
670 .event_str = _str, } \
671 })[0].attr.attr)
672
673#define I915_EVENT(_name, _config, _unit) \
674 I915_EVENT_ATTR(_name, _config), \
675 I915_EVENT_STR(_name.unit, _unit)
676
677#define I915_ENGINE_EVENT(_name, _class, _instance, _sample) \
678 I915_EVENT_ATTR(_name, __I915_PMU_ENGINE(_class, _instance, _sample)), \
679 I915_EVENT_STR(_name.unit, "ns")
680
681#define I915_ENGINE_EVENTS(_name, _class, _instance) \
682 I915_ENGINE_EVENT(_name##_instance-busy, _class, _instance, I915_SAMPLE_BUSY), \
683 I915_ENGINE_EVENT(_name##_instance-sema, _class, _instance, I915_SAMPLE_SEMA), \
684 I915_ENGINE_EVENT(_name##_instance-wait, _class, _instance, I915_SAMPLE_WAIT)
685
686static struct attribute *i915_pmu_events_attrs[] = {
687 I915_ENGINE_EVENTS(rcs, I915_ENGINE_CLASS_RENDER, 0),
688 I915_ENGINE_EVENTS(bcs, I915_ENGINE_CLASS_COPY, 0),
689 I915_ENGINE_EVENTS(vcs, I915_ENGINE_CLASS_VIDEO, 0),
690 I915_ENGINE_EVENTS(vcs, I915_ENGINE_CLASS_VIDEO, 1),
691 I915_ENGINE_EVENTS(vecs, I915_ENGINE_CLASS_VIDEO_ENHANCE, 0),
692
693 I915_EVENT(actual-frequency, I915_PMU_ACTUAL_FREQUENCY, "MHz"),
694 I915_EVENT(requested-frequency, I915_PMU_REQUESTED_FREQUENCY, "MHz"),
695
0cd4684d
TU
696 I915_EVENT_ATTR(interrupts, I915_PMU_INTERRUPTS),
697
6060b6ae 698 I915_EVENT(rc6-residency, I915_PMU_RC6_RESIDENCY, "ns"),
6060b6ae 699
b46a33e2
TU
700 NULL,
701};
702
703static const struct attribute_group i915_pmu_events_attr_group = {
704 .name = "events",
705 .attrs = i915_pmu_events_attrs,
706};
707
708static ssize_t
709i915_pmu_get_attr_cpumask(struct device *dev,
710 struct device_attribute *attr,
711 char *buf)
712{
713 return cpumap_print_to_pagebuf(true, buf, &i915_pmu_cpumask);
714}
715
716static DEVICE_ATTR(cpumask, 0444, i915_pmu_get_attr_cpumask, NULL);
717
718static struct attribute *i915_cpumask_attrs[] = {
719 &dev_attr_cpumask.attr,
720 NULL,
721};
722
723static struct attribute_group i915_pmu_cpumask_attr_group = {
724 .attrs = i915_cpumask_attrs,
725};
726
727static const struct attribute_group *i915_pmu_attr_groups[] = {
728 &i915_pmu_format_attr_group,
729 &i915_pmu_events_attr_group,
730 &i915_pmu_cpumask_attr_group,
731 NULL
732};
733
b46a33e2
TU
734static int i915_pmu_cpu_online(unsigned int cpu, struct hlist_node *node)
735{
736 struct i915_pmu *pmu = hlist_entry_safe(node, typeof(*pmu), node);
b46a33e2
TU
737
738 GEM_BUG_ON(!pmu->base.event_init);
739
b46a33e2 740 /* Select the first online CPU as a designated reader. */
0426c046 741 if (!cpumask_weight(&i915_pmu_cpumask))
b46a33e2
TU
742 cpumask_set_cpu(cpu, &i915_pmu_cpumask);
743
744 return 0;
745}
746
747static int i915_pmu_cpu_offline(unsigned int cpu, struct hlist_node *node)
748{
749 struct i915_pmu *pmu = hlist_entry_safe(node, typeof(*pmu), node);
750 unsigned int target;
751
752 GEM_BUG_ON(!pmu->base.event_init);
753
754 if (cpumask_test_and_clear_cpu(cpu, &i915_pmu_cpumask)) {
755 target = cpumask_any_but(topology_sibling_cpumask(cpu), cpu);
756 /* Migrate events if there is a valid target */
757 if (target < nr_cpu_ids) {
758 cpumask_set_cpu(target, &i915_pmu_cpumask);
759 perf_pmu_migrate_context(&pmu->base, cpu, target);
760 }
761 }
762
763 return 0;
764}
765
766static enum cpuhp_state cpuhp_slot = CPUHP_INVALID;
b46a33e2
TU
767
768static int i915_pmu_register_cpuhp_state(struct drm_i915_private *i915)
769{
b46a33e2
TU
770 enum cpuhp_state slot;
771 int ret;
772
773 ret = cpuhp_setup_state_multi(CPUHP_AP_ONLINE_DYN,
774 "perf/x86/intel/i915:online",
775 i915_pmu_cpu_online,
776 i915_pmu_cpu_offline);
777 if (ret < 0)
778 return ret;
779
780 slot = ret;
781 ret = cpuhp_state_add_instance(slot, &i915->pmu.node);
782 if (ret) {
783 cpuhp_remove_multi_state(slot);
784 return ret;
785 }
786
787 cpuhp_slot = slot;
b46a33e2
TU
788 return 0;
789}
790
791static void i915_pmu_unregister_cpuhp_state(struct drm_i915_private *i915)
792{
b46a33e2
TU
793 WARN_ON(cpuhp_slot == CPUHP_INVALID);
794 WARN_ON(cpuhp_state_remove_instance(cpuhp_slot, &i915->pmu.node));
795 cpuhp_remove_multi_state(cpuhp_slot);
b46a33e2
TU
796}
797
798void i915_pmu_register(struct drm_i915_private *i915)
799{
b3add01e
TU
800 struct intel_engine_cs *engine;
801 enum intel_engine_id id;
b46a33e2
TU
802 int ret;
803
804 if (INTEL_GEN(i915) <= 2) {
805 DRM_INFO("PMU not supported for this GPU.");
806 return;
807 }
808
809 i915->pmu.base.attr_groups = i915_pmu_attr_groups;
810 i915->pmu.base.task_ctx_nr = perf_invalid_context;
811 i915->pmu.base.event_init = i915_pmu_event_init;
812 i915->pmu.base.add = i915_pmu_event_add;
813 i915->pmu.base.del = i915_pmu_event_del;
814 i915->pmu.base.start = i915_pmu_event_start;
815 i915->pmu.base.stop = i915_pmu_event_stop;
816 i915->pmu.base.read = i915_pmu_event_read;
817 i915->pmu.base.event_idx = i915_pmu_event_event_idx;
818
819 spin_lock_init(&i915->pmu.lock);
820 hrtimer_init(&i915->pmu.timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
821 i915->pmu.timer.function = i915_sample;
822
b3add01e
TU
823 for_each_engine(engine, i915, id)
824 INIT_DELAYED_WORK(&engine->pmu.disable_busy_stats,
825 __disable_busy_stats);
826
b46a33e2
TU
827 ret = perf_pmu_register(&i915->pmu.base, "i915", -1);
828 if (ret)
829 goto err;
830
831 ret = i915_pmu_register_cpuhp_state(i915);
832 if (ret)
833 goto err_unreg;
834
835 return;
836
837err_unreg:
838 perf_pmu_unregister(&i915->pmu.base);
839err:
840 i915->pmu.base.event_init = NULL;
841 DRM_NOTE("Failed to register PMU! (err=%d)\n", ret);
842}
843
844void i915_pmu_unregister(struct drm_i915_private *i915)
845{
b3add01e
TU
846 struct intel_engine_cs *engine;
847 enum intel_engine_id id;
848
b46a33e2
TU
849 if (!i915->pmu.base.event_init)
850 return;
851
852 WARN_ON(i915->pmu.enable);
853
854 hrtimer_cancel(&i915->pmu.timer);
855
b3add01e
TU
856 for_each_engine(engine, i915, id) {
857 GEM_BUG_ON(engine->pmu.busy_stats);
858 flush_delayed_work(&engine->pmu.disable_busy_stats);
859 }
860
b46a33e2
TU
861 i915_pmu_unregister_cpuhp_state(i915);
862
863 perf_pmu_unregister(&i915->pmu.base);
864 i915->pmu.base.event_init = NULL;
865}