]> git.proxmox.com Git - mirror_ubuntu-hirsute-kernel.git/blob - drivers/gpu/drm/i915/gt/intel_rc6.c
drm/i915/gen8+: Add RC6 CTX corruption WA
[mirror_ubuntu-hirsute-kernel.git] / drivers / gpu / drm / i915 / gt / intel_rc6.c
1 /*
2 * SPDX-License-Identifier: MIT
3 *
4 * Copyright © 2019 Intel Corporation
5 */
6
7 #include <linux/pm_runtime.h>
8
9 #include "i915_drv.h"
10 #include "intel_gt.h"
11 #include "intel_gt_pm.h"
12 #include "intel_rc6.h"
13 #include "intel_sideband.h"
14
15 /**
16 * DOC: RC6
17 *
18 * RC6 is a special power stage which allows the GPU to enter an very
19 * low-voltage mode when idle, using down to 0V while at this stage. This
20 * stage is entered automatically when the GPU is idle when RC6 support is
21 * enabled, and as soon as new workload arises GPU wakes up automatically as
22 * well.
23 *
24 * There are different RC6 modes available in Intel GPU, which differentiate
25 * among each other with the latency required to enter and leave RC6 and
26 * voltage consumed by the GPU in different states.
27 *
28 * The combination of the following flags define which states GPU is allowed
29 * to enter, while RC6 is the normal RC6 state, RC6p is the deep RC6, and
30 * RC6pp is deepest RC6. Their support by hardware varies according to the
31 * GPU, BIOS, chipset and platform. RC6 is usually the safest one and the one
32 * which brings the most power savings; deeper states save more power, but
33 * require higher latency to switch to and wake up.
34 */
35
36 static struct intel_gt *rc6_to_gt(struct intel_rc6 *rc6)
37 {
38 return container_of(rc6, struct intel_gt, rc6);
39 }
40
41 static struct intel_uncore *rc6_to_uncore(struct intel_rc6 *rc)
42 {
43 return rc6_to_gt(rc)->uncore;
44 }
45
46 static struct drm_i915_private *rc6_to_i915(struct intel_rc6 *rc)
47 {
48 return rc6_to_gt(rc)->i915;
49 }
50
51 static inline void set(struct intel_uncore *uncore, i915_reg_t reg, u32 val)
52 {
53 intel_uncore_write_fw(uncore, reg, val);
54 }
55
56 static void gen11_rc6_enable(struct intel_rc6 *rc6)
57 {
58 struct intel_uncore *uncore = rc6_to_uncore(rc6);
59 struct intel_engine_cs *engine;
60 enum intel_engine_id id;
61
62 /* 2b: Program RC6 thresholds.*/
63 set(uncore, GEN6_RC6_WAKE_RATE_LIMIT, 54 << 16 | 85);
64 set(uncore, GEN10_MEDIA_WAKE_RATE_LIMIT, 150);
65
66 set(uncore, GEN6_RC_EVALUATION_INTERVAL, 125000); /* 12500 * 1280ns */
67 set(uncore, GEN6_RC_IDLE_HYSTERSIS, 25); /* 25 * 1280ns */
68 for_each_engine(engine, rc6_to_gt(rc6), id)
69 set(uncore, RING_MAX_IDLE(engine->mmio_base), 10);
70
71 set(uncore, GUC_MAX_IDLE_COUNT, 0xA);
72
73 set(uncore, GEN6_RC_SLEEP, 0);
74
75 set(uncore, GEN6_RC6_THRESHOLD, 50000); /* 50/125ms per EI */
76
77 /*
78 * 2c: Program Coarse Power Gating Policies.
79 *
80 * Bspec's guidance is to use 25us (really 25 * 1280ns) here. What we
81 * use instead is a more conservative estimate for the maximum time
82 * it takes us to service a CS interrupt and submit a new ELSP - that
83 * is the time which the GPU is idle waiting for the CPU to select the
84 * next request to execute. If the idle hysteresis is less than that
85 * interrupt service latency, the hardware will automatically gate
86 * the power well and we will then incur the wake up cost on top of
87 * the service latency. A similar guide from plane_state is that we
88 * do not want the enable hysteresis to less than the wakeup latency.
89 *
90 * igt/gem_exec_nop/sequential provides a rough estimate for the
91 * service latency, and puts it around 10us for Broadwell (and other
92 * big core) and around 40us for Broxton (and other low power cores).
93 * [Note that for legacy ringbuffer submission, this is less than 1us!]
94 * However, the wakeup latency on Broxton is closer to 100us. To be
95 * conservative, we have to factor in a context switch on top (due
96 * to ksoftirqd).
97 */
98 set(uncore, GEN9_MEDIA_PG_IDLE_HYSTERESIS, 250);
99 set(uncore, GEN9_RENDER_PG_IDLE_HYSTERESIS, 250);
100
101 /* 3a: Enable RC6 */
102 set(uncore, GEN6_RC_CONTROL,
103 GEN6_RC_CTL_HW_ENABLE |
104 GEN6_RC_CTL_RC6_ENABLE |
105 GEN6_RC_CTL_EI_MODE(1));
106
107 set(uncore, GEN9_PG_ENABLE,
108 GEN9_RENDER_PG_ENABLE |
109 GEN9_MEDIA_PG_ENABLE |
110 GEN11_MEDIA_SAMPLER_PG_ENABLE);
111 }
112
113 static void gen9_rc6_enable(struct intel_rc6 *rc6)
114 {
115 struct intel_uncore *uncore = rc6_to_uncore(rc6);
116 struct intel_engine_cs *engine;
117 enum intel_engine_id id;
118 u32 rc6_mode;
119
120 /* 2b: Program RC6 thresholds.*/
121 if (INTEL_GEN(rc6_to_i915(rc6)) >= 10) {
122 set(uncore, GEN6_RC6_WAKE_RATE_LIMIT, 54 << 16 | 85);
123 set(uncore, GEN10_MEDIA_WAKE_RATE_LIMIT, 150);
124 } else if (IS_SKYLAKE(rc6_to_i915(rc6))) {
125 /*
126 * WaRsDoubleRc6WrlWithCoarsePowerGating:skl Doubling WRL only
127 * when CPG is enabled
128 */
129 set(uncore, GEN6_RC6_WAKE_RATE_LIMIT, 108 << 16);
130 } else {
131 set(uncore, GEN6_RC6_WAKE_RATE_LIMIT, 54 << 16);
132 }
133
134 set(uncore, GEN6_RC_EVALUATION_INTERVAL, 125000); /* 12500 * 1280ns */
135 set(uncore, GEN6_RC_IDLE_HYSTERSIS, 25); /* 25 * 1280ns */
136 for_each_engine(engine, rc6_to_gt(rc6), id)
137 set(uncore, RING_MAX_IDLE(engine->mmio_base), 10);
138
139 set(uncore, GUC_MAX_IDLE_COUNT, 0xA);
140
141 set(uncore, GEN6_RC_SLEEP, 0);
142
143 /*
144 * 2c: Program Coarse Power Gating Policies.
145 *
146 * Bspec's guidance is to use 25us (really 25 * 1280ns) here. What we
147 * use instead is a more conservative estimate for the maximum time
148 * it takes us to service a CS interrupt and submit a new ELSP - that
149 * is the time which the GPU is idle waiting for the CPU to select the
150 * next request to execute. If the idle hysteresis is less than that
151 * interrupt service latency, the hardware will automatically gate
152 * the power well and we will then incur the wake up cost on top of
153 * the service latency. A similar guide from plane_state is that we
154 * do not want the enable hysteresis to less than the wakeup latency.
155 *
156 * igt/gem_exec_nop/sequential provides a rough estimate for the
157 * service latency, and puts it around 10us for Broadwell (and other
158 * big core) and around 40us for Broxton (and other low power cores).
159 * [Note that for legacy ringbuffer submission, this is less than 1us!]
160 * However, the wakeup latency on Broxton is closer to 100us. To be
161 * conservative, we have to factor in a context switch on top (due
162 * to ksoftirqd).
163 */
164 set(uncore, GEN9_MEDIA_PG_IDLE_HYSTERESIS, 250);
165 set(uncore, GEN9_RENDER_PG_IDLE_HYSTERESIS, 250);
166
167 /* 3a: Enable RC6 */
168 set(uncore, GEN6_RC6_THRESHOLD, 37500); /* 37.5/125ms per EI */
169
170 /* WaRsUseTimeoutMode:cnl (pre-prod) */
171 if (IS_CNL_REVID(rc6_to_i915(rc6), CNL_REVID_A0, CNL_REVID_C0))
172 rc6_mode = GEN7_RC_CTL_TO_MODE;
173 else
174 rc6_mode = GEN6_RC_CTL_EI_MODE(1);
175
176 set(uncore, GEN6_RC_CONTROL,
177 GEN6_RC_CTL_HW_ENABLE |
178 GEN6_RC_CTL_RC6_ENABLE |
179 rc6_mode);
180
181 set(uncore, GEN9_PG_ENABLE,
182 GEN9_RENDER_PG_ENABLE | GEN9_MEDIA_PG_ENABLE);
183 }
184
185 static void gen8_rc6_enable(struct intel_rc6 *rc6)
186 {
187 struct intel_uncore *uncore = rc6_to_uncore(rc6);
188 struct intel_engine_cs *engine;
189 enum intel_engine_id id;
190
191 /* 2b: Program RC6 thresholds.*/
192 set(uncore, GEN6_RC6_WAKE_RATE_LIMIT, 40 << 16);
193 set(uncore, GEN6_RC_EVALUATION_INTERVAL, 125000); /* 12500 * 1280ns */
194 set(uncore, GEN6_RC_IDLE_HYSTERSIS, 25); /* 25 * 1280ns */
195 for_each_engine(engine, rc6_to_gt(rc6), id)
196 set(uncore, RING_MAX_IDLE(engine->mmio_base), 10);
197 set(uncore, GEN6_RC_SLEEP, 0);
198 set(uncore, GEN6_RC6_THRESHOLD, 625); /* 800us/1.28 for TO */
199
200 /* 3: Enable RC6 */
201 set(uncore, GEN6_RC_CONTROL,
202 GEN6_RC_CTL_HW_ENABLE |
203 GEN7_RC_CTL_TO_MODE |
204 GEN6_RC_CTL_RC6_ENABLE);
205 }
206
207 static void gen6_rc6_enable(struct intel_rc6 *rc6)
208 {
209 struct intel_uncore *uncore = rc6_to_uncore(rc6);
210 struct drm_i915_private *i915 = rc6_to_i915(rc6);
211 struct intel_engine_cs *engine;
212 enum intel_engine_id id;
213 u32 rc6vids, rc6_mask;
214 int ret;
215
216 set(uncore, GEN6_RC1_WAKE_RATE_LIMIT, 1000 << 16);
217 set(uncore, GEN6_RC6_WAKE_RATE_LIMIT, 40 << 16 | 30);
218 set(uncore, GEN6_RC6pp_WAKE_RATE_LIMIT, 30);
219 set(uncore, GEN6_RC_EVALUATION_INTERVAL, 125000);
220 set(uncore, GEN6_RC_IDLE_HYSTERSIS, 25);
221
222 for_each_engine(engine, rc6_to_gt(rc6), id)
223 set(uncore, RING_MAX_IDLE(engine->mmio_base), 10);
224
225 set(uncore, GEN6_RC_SLEEP, 0);
226 set(uncore, GEN6_RC1e_THRESHOLD, 1000);
227 if (IS_IVYBRIDGE(i915))
228 set(uncore, GEN6_RC6_THRESHOLD, 125000);
229 else
230 set(uncore, GEN6_RC6_THRESHOLD, 50000);
231 set(uncore, GEN6_RC6p_THRESHOLD, 150000);
232 set(uncore, GEN6_RC6pp_THRESHOLD, 64000); /* unused */
233
234 /* We don't use those on Haswell */
235 rc6_mask = GEN6_RC_CTL_RC6_ENABLE;
236 if (HAS_RC6p(i915))
237 rc6_mask |= GEN6_RC_CTL_RC6p_ENABLE;
238 if (HAS_RC6pp(i915))
239 rc6_mask |= GEN6_RC_CTL_RC6pp_ENABLE;
240 set(uncore, GEN6_RC_CONTROL,
241 rc6_mask |
242 GEN6_RC_CTL_EI_MODE(1) |
243 GEN6_RC_CTL_HW_ENABLE);
244
245 rc6vids = 0;
246 ret = sandybridge_pcode_read(i915, GEN6_PCODE_READ_RC6VIDS,
247 &rc6vids, NULL);
248 if (IS_GEN(i915, 6) && ret) {
249 DRM_DEBUG_DRIVER("Couldn't check for BIOS workaround\n");
250 } else if (IS_GEN(i915, 6) &&
251 (GEN6_DECODE_RC6_VID(rc6vids & 0xff) < 450)) {
252 DRM_DEBUG_DRIVER("You should update your BIOS. Correcting minimum rc6 voltage (%dmV->%dmV)\n",
253 GEN6_DECODE_RC6_VID(rc6vids & 0xff), 450);
254 rc6vids &= 0xffff00;
255 rc6vids |= GEN6_ENCODE_RC6_VID(450);
256 ret = sandybridge_pcode_write(i915, GEN6_PCODE_WRITE_RC6VIDS, rc6vids);
257 if (ret)
258 DRM_ERROR("Couldn't fix incorrect rc6 voltage\n");
259 }
260 }
261
262 /* Check that the pcbr address is not empty. */
263 static int chv_rc6_init(struct intel_rc6 *rc6)
264 {
265 struct intel_uncore *uncore = rc6_to_uncore(rc6);
266 resource_size_t pctx_paddr, paddr;
267 resource_size_t pctx_size = 32 * SZ_1K;
268 u32 pcbr;
269
270 pcbr = intel_uncore_read(uncore, VLV_PCBR);
271 if ((pcbr >> VLV_PCBR_ADDR_SHIFT) == 0) {
272 DRM_DEBUG_DRIVER("BIOS didn't set up PCBR, fixing up\n");
273 paddr = rc6_to_i915(rc6)->dsm.end + 1 - pctx_size;
274 GEM_BUG_ON(paddr > U32_MAX);
275
276 pctx_paddr = (paddr & ~4095);
277 intel_uncore_write(uncore, VLV_PCBR, pctx_paddr);
278 }
279
280 return 0;
281 }
282
283 static int vlv_rc6_init(struct intel_rc6 *rc6)
284 {
285 struct drm_i915_private *i915 = rc6_to_i915(rc6);
286 struct intel_uncore *uncore = rc6_to_uncore(rc6);
287 struct drm_i915_gem_object *pctx;
288 resource_size_t pctx_paddr;
289 resource_size_t pctx_size = 24 * SZ_1K;
290 u32 pcbr;
291
292 pcbr = intel_uncore_read(uncore, VLV_PCBR);
293 if (pcbr) {
294 /* BIOS set it up already, grab the pre-alloc'd space */
295 resource_size_t pcbr_offset;
296
297 pcbr_offset = (pcbr & ~4095) - i915->dsm.start;
298 pctx = i915_gem_object_create_stolen_for_preallocated(i915,
299 pcbr_offset,
300 I915_GTT_OFFSET_NONE,
301 pctx_size);
302 if (IS_ERR(pctx))
303 return PTR_ERR(pctx);
304
305 goto out;
306 }
307
308 DRM_DEBUG_DRIVER("BIOS didn't set up PCBR, fixing up\n");
309
310 /*
311 * From the Gunit register HAS:
312 * The Gfx driver is expected to program this register and ensure
313 * proper allocation within Gfx stolen memory. For example, this
314 * register should be programmed such than the PCBR range does not
315 * overlap with other ranges, such as the frame buffer, protected
316 * memory, or any other relevant ranges.
317 */
318 pctx = i915_gem_object_create_stolen(i915, pctx_size);
319 if (IS_ERR(pctx)) {
320 DRM_DEBUG("not enough stolen space for PCTX, disabling\n");
321 return PTR_ERR(pctx);
322 }
323
324 GEM_BUG_ON(range_overflows_t(u64,
325 i915->dsm.start,
326 pctx->stolen->start,
327 U32_MAX));
328 pctx_paddr = i915->dsm.start + pctx->stolen->start;
329 intel_uncore_write(uncore, VLV_PCBR, pctx_paddr);
330
331 out:
332 rc6->pctx = pctx;
333 return 0;
334 }
335
336 static void chv_rc6_enable(struct intel_rc6 *rc6)
337 {
338 struct intel_uncore *uncore = rc6_to_uncore(rc6);
339 struct intel_engine_cs *engine;
340 enum intel_engine_id id;
341
342 /* 2a: Program RC6 thresholds.*/
343 set(uncore, GEN6_RC6_WAKE_RATE_LIMIT, 40 << 16);
344 set(uncore, GEN6_RC_EVALUATION_INTERVAL, 125000); /* 12500 * 1280ns */
345 set(uncore, GEN6_RC_IDLE_HYSTERSIS, 25); /* 25 * 1280ns */
346
347 for_each_engine(engine, rc6_to_gt(rc6), id)
348 set(uncore, RING_MAX_IDLE(engine->mmio_base), 10);
349 set(uncore, GEN6_RC_SLEEP, 0);
350
351 /* TO threshold set to 500 us (0x186 * 1.28 us) */
352 set(uncore, GEN6_RC6_THRESHOLD, 0x186);
353
354 /* Allows RC6 residency counter to work */
355 set(uncore, VLV_COUNTER_CONTROL,
356 _MASKED_BIT_ENABLE(VLV_COUNT_RANGE_HIGH |
357 VLV_MEDIA_RC6_COUNT_EN |
358 VLV_RENDER_RC6_COUNT_EN));
359
360 /* 3: Enable RC6 */
361 set(uncore, GEN6_RC_CONTROL, GEN7_RC_CTL_TO_MODE);
362 }
363
364 static void vlv_rc6_enable(struct intel_rc6 *rc6)
365 {
366 struct intel_uncore *uncore = rc6_to_uncore(rc6);
367 struct intel_engine_cs *engine;
368 enum intel_engine_id id;
369
370 set(uncore, GEN6_RC6_WAKE_RATE_LIMIT, 0x00280000);
371 set(uncore, GEN6_RC_EVALUATION_INTERVAL, 125000);
372 set(uncore, GEN6_RC_IDLE_HYSTERSIS, 25);
373
374 for_each_engine(engine, rc6_to_gt(rc6), id)
375 set(uncore, RING_MAX_IDLE(engine->mmio_base), 10);
376
377 set(uncore, GEN6_RC6_THRESHOLD, 0x557);
378
379 /* Allows RC6 residency counter to work */
380 set(uncore, VLV_COUNTER_CONTROL,
381 _MASKED_BIT_ENABLE(VLV_COUNT_RANGE_HIGH |
382 VLV_MEDIA_RC0_COUNT_EN |
383 VLV_RENDER_RC0_COUNT_EN |
384 VLV_MEDIA_RC6_COUNT_EN |
385 VLV_RENDER_RC6_COUNT_EN));
386
387 set(uncore, GEN6_RC_CONTROL,
388 GEN7_RC_CTL_TO_MODE | VLV_RC_CTL_CTX_RST_PARALLEL);
389 }
390
391 static bool bxt_check_bios_rc6_setup(struct intel_rc6 *rc6)
392 {
393 struct intel_uncore *uncore = rc6_to_uncore(rc6);
394 struct drm_i915_private *i915 = rc6_to_i915(rc6);
395 u32 rc6_ctx_base, rc_ctl, rc_sw_target;
396 bool enable_rc6 = true;
397
398 rc_ctl = intel_uncore_read(uncore, GEN6_RC_CONTROL);
399 rc_sw_target = intel_uncore_read(uncore, GEN6_RC_STATE);
400 rc_sw_target &= RC_SW_TARGET_STATE_MASK;
401 rc_sw_target >>= RC_SW_TARGET_STATE_SHIFT;
402 DRM_DEBUG_DRIVER("BIOS enabled RC states: "
403 "HW_CTRL %s HW_RC6 %s SW_TARGET_STATE %x\n",
404 onoff(rc_ctl & GEN6_RC_CTL_HW_ENABLE),
405 onoff(rc_ctl & GEN6_RC_CTL_RC6_ENABLE),
406 rc_sw_target);
407
408 if (!(intel_uncore_read(uncore, RC6_LOCATION) & RC6_CTX_IN_DRAM)) {
409 DRM_DEBUG_DRIVER("RC6 Base location not set properly.\n");
410 enable_rc6 = false;
411 }
412
413 /*
414 * The exact context size is not known for BXT, so assume a page size
415 * for this check.
416 */
417 rc6_ctx_base =
418 intel_uncore_read(uncore, RC6_CTX_BASE) & RC6_CTX_BASE_MASK;
419 if (!(rc6_ctx_base >= i915->dsm_reserved.start &&
420 rc6_ctx_base + PAGE_SIZE < i915->dsm_reserved.end)) {
421 DRM_DEBUG_DRIVER("RC6 Base address not as expected.\n");
422 enable_rc6 = false;
423 }
424
425 if (!((intel_uncore_read(uncore, PWRCTX_MAXCNT_RCSUNIT) & IDLE_TIME_MASK) > 1 &&
426 (intel_uncore_read(uncore, PWRCTX_MAXCNT_VCSUNIT0) & IDLE_TIME_MASK) > 1 &&
427 (intel_uncore_read(uncore, PWRCTX_MAXCNT_BCSUNIT) & IDLE_TIME_MASK) > 1 &&
428 (intel_uncore_read(uncore, PWRCTX_MAXCNT_VECSUNIT) & IDLE_TIME_MASK) > 1)) {
429 DRM_DEBUG_DRIVER("Engine Idle wait time not set properly.\n");
430 enable_rc6 = false;
431 }
432
433 if (!intel_uncore_read(uncore, GEN8_PUSHBUS_CONTROL) ||
434 !intel_uncore_read(uncore, GEN8_PUSHBUS_ENABLE) ||
435 !intel_uncore_read(uncore, GEN8_PUSHBUS_SHIFT)) {
436 DRM_DEBUG_DRIVER("Pushbus not setup properly.\n");
437 enable_rc6 = false;
438 }
439
440 if (!intel_uncore_read(uncore, GEN6_GFXPAUSE)) {
441 DRM_DEBUG_DRIVER("GFX pause not setup properly.\n");
442 enable_rc6 = false;
443 }
444
445 if (!intel_uncore_read(uncore, GEN8_MISC_CTRL0)) {
446 DRM_DEBUG_DRIVER("GPM control not setup properly.\n");
447 enable_rc6 = false;
448 }
449
450 return enable_rc6;
451 }
452
453 static bool rc6_supported(struct intel_rc6 *rc6)
454 {
455 struct drm_i915_private *i915 = rc6_to_i915(rc6);
456
457 if (!HAS_RC6(i915))
458 return false;
459
460 if (intel_vgpu_active(i915))
461 return false;
462
463 if (is_mock_gt(rc6_to_gt(rc6)))
464 return false;
465
466 if (IS_GEN9_LP(i915) && !bxt_check_bios_rc6_setup(rc6)) {
467 dev_notice(i915->drm.dev,
468 "RC6 and powersaving disabled by BIOS\n");
469 return false;
470 }
471
472 return true;
473 }
474
475 static void rpm_get(struct intel_rc6 *rc6)
476 {
477 GEM_BUG_ON(rc6->wakeref);
478 pm_runtime_get_sync(&rc6_to_i915(rc6)->drm.pdev->dev);
479 rc6->wakeref = true;
480 }
481
482 static void rpm_put(struct intel_rc6 *rc6)
483 {
484 GEM_BUG_ON(!rc6->wakeref);
485 pm_runtime_put(&rc6_to_i915(rc6)->drm.pdev->dev);
486 rc6->wakeref = false;
487 }
488
489 static bool intel_rc6_ctx_corrupted(struct intel_rc6 *rc6)
490 {
491 return !intel_uncore_read(rc6_to_uncore(rc6), GEN8_RC6_CTX_INFO);
492 }
493
494 static void intel_rc6_ctx_wa_init(struct intel_rc6 *rc6)
495 {
496 struct drm_i915_private *i915 = rc6_to_i915(rc6);
497
498 if (!NEEDS_RC6_CTX_CORRUPTION_WA(i915))
499 return;
500
501 if (intel_rc6_ctx_corrupted(rc6)) {
502 DRM_INFO("RC6 context corrupted, disabling runtime power management\n");
503 rc6->ctx_corrupted = true;
504 }
505 }
506
507 /**
508 * intel_rc6_ctx_wa_resume - system resume sequence for the RC6 CTX WA
509 * @rc6: rc6 state
510 *
511 * Perform any steps needed to re-init the RC6 CTX WA after system resume.
512 */
513 void intel_rc6_ctx_wa_resume(struct intel_rc6 *rc6)
514 {
515 if (rc6->ctx_corrupted && !intel_rc6_ctx_corrupted(rc6)) {
516 DRM_INFO("RC6 context restored, re-enabling runtime power management\n");
517 rc6->ctx_corrupted = false;
518 }
519 }
520
521 /**
522 * intel_rc6_ctx_wa_check - check for a new RC6 CTX corruption
523 * @rc6: rc6 state
524 *
525 * Check if an RC6 CTX corruption has happened since the last check and if so
526 * disable RC6 and runtime power management.
527 */
528 void intel_rc6_ctx_wa_check(struct intel_rc6 *rc6)
529 {
530 struct drm_i915_private *i915 = rc6_to_i915(rc6);
531
532 if (!NEEDS_RC6_CTX_CORRUPTION_WA(i915))
533 return;
534
535 if (rc6->ctx_corrupted)
536 return;
537
538 if (!intel_rc6_ctx_corrupted(rc6))
539 return;
540
541 DRM_NOTE("RC6 context corruption, disabling runtime power management\n");
542
543 intel_rc6_disable(rc6);
544 rc6->ctx_corrupted = true;
545
546 return;
547 }
548
549 static void __intel_rc6_disable(struct intel_rc6 *rc6)
550 {
551 struct drm_i915_private *i915 = rc6_to_i915(rc6);
552 struct intel_uncore *uncore = rc6_to_uncore(rc6);
553
554 intel_uncore_forcewake_get(uncore, FORCEWAKE_ALL);
555 if (INTEL_GEN(i915) >= 9)
556 set(uncore, GEN9_PG_ENABLE, 0);
557 set(uncore, GEN6_RC_CONTROL, 0);
558 set(uncore, GEN6_RC_STATE, 0);
559 intel_uncore_forcewake_put(uncore, FORCEWAKE_ALL);
560 }
561
562 void intel_rc6_init(struct intel_rc6 *rc6)
563 {
564 struct drm_i915_private *i915 = rc6_to_i915(rc6);
565 int err;
566
567 /* Disable runtime-pm until we can save the GPU state with rc6 pctx */
568 rpm_get(rc6);
569
570 if (!rc6_supported(rc6))
571 return;
572
573 intel_rc6_ctx_wa_init(rc6);
574
575 if (IS_CHERRYVIEW(i915))
576 err = chv_rc6_init(rc6);
577 else if (IS_VALLEYVIEW(i915))
578 err = vlv_rc6_init(rc6);
579 else
580 err = 0;
581
582 /* Sanitize rc6, ensure it is disabled before we are ready. */
583 __intel_rc6_disable(rc6);
584
585 rc6->supported = err == 0;
586 }
587
588 void intel_rc6_sanitize(struct intel_rc6 *rc6)
589 {
590 if (rc6->enabled) { /* unbalanced suspend/resume */
591 rpm_get(rc6);
592 rc6->enabled = false;
593 }
594
595 if (rc6->supported)
596 __intel_rc6_disable(rc6);
597 }
598
599 void intel_rc6_enable(struct intel_rc6 *rc6)
600 {
601 struct drm_i915_private *i915 = rc6_to_i915(rc6);
602 struct intel_uncore *uncore = rc6_to_uncore(rc6);
603
604 if (!rc6->supported)
605 return;
606
607 GEM_BUG_ON(rc6->enabled);
608
609 if (rc6->ctx_corrupted)
610 return;
611
612 intel_uncore_forcewake_get(uncore, FORCEWAKE_ALL);
613
614 if (IS_CHERRYVIEW(i915))
615 chv_rc6_enable(rc6);
616 else if (IS_VALLEYVIEW(i915))
617 vlv_rc6_enable(rc6);
618 else if (INTEL_GEN(i915) >= 11)
619 gen11_rc6_enable(rc6);
620 else if (INTEL_GEN(i915) >= 9)
621 gen9_rc6_enable(rc6);
622 else if (IS_BROADWELL(i915))
623 gen8_rc6_enable(rc6);
624 else if (INTEL_GEN(i915) >= 6)
625 gen6_rc6_enable(rc6);
626
627 intel_uncore_forcewake_put(uncore, FORCEWAKE_ALL);
628
629 /* rc6 is ready, runtime-pm is go! */
630 rpm_put(rc6);
631 rc6->enabled = true;
632 }
633
634 void intel_rc6_disable(struct intel_rc6 *rc6)
635 {
636 if (!rc6->enabled)
637 return;
638
639 rpm_get(rc6);
640 rc6->enabled = false;
641
642 __intel_rc6_disable(rc6);
643 }
644
645 void intel_rc6_fini(struct intel_rc6 *rc6)
646 {
647 struct drm_i915_gem_object *pctx;
648
649 intel_rc6_disable(rc6);
650
651 pctx = fetch_and_zero(&rc6->pctx);
652 if (pctx)
653 i915_gem_object_put(pctx);
654
655 if (rc6->wakeref)
656 rpm_put(rc6);
657 }
658
659 static u64 vlv_residency_raw(struct intel_uncore *uncore, const i915_reg_t reg)
660 {
661 u32 lower, upper, tmp;
662 int loop = 2;
663
664 /*
665 * The register accessed do not need forcewake. We borrow
666 * uncore lock to prevent concurrent access to range reg.
667 */
668 lockdep_assert_held(&uncore->lock);
669
670 /*
671 * vlv and chv residency counters are 40 bits in width.
672 * With a control bit, we can choose between upper or lower
673 * 32bit window into this counter.
674 *
675 * Although we always use the counter in high-range mode elsewhere,
676 * userspace may attempt to read the value before rc6 is initialised,
677 * before we have set the default VLV_COUNTER_CONTROL value. So always
678 * set the high bit to be safe.
679 */
680 set(uncore, VLV_COUNTER_CONTROL,
681 _MASKED_BIT_ENABLE(VLV_COUNT_RANGE_HIGH));
682 upper = intel_uncore_read_fw(uncore, reg);
683 do {
684 tmp = upper;
685
686 set(uncore, VLV_COUNTER_CONTROL,
687 _MASKED_BIT_DISABLE(VLV_COUNT_RANGE_HIGH));
688 lower = intel_uncore_read_fw(uncore, reg);
689
690 set(uncore, VLV_COUNTER_CONTROL,
691 _MASKED_BIT_ENABLE(VLV_COUNT_RANGE_HIGH));
692 upper = intel_uncore_read_fw(uncore, reg);
693 } while (upper != tmp && --loop);
694
695 /*
696 * Everywhere else we always use VLV_COUNTER_CONTROL with the
697 * VLV_COUNT_RANGE_HIGH bit set - so it is safe to leave it set
698 * now.
699 */
700
701 return lower | (u64)upper << 8;
702 }
703
704 u64 intel_rc6_residency_ns(struct intel_rc6 *rc6, const i915_reg_t reg)
705 {
706 struct drm_i915_private *i915 = rc6_to_i915(rc6);
707 struct intel_uncore *uncore = rc6_to_uncore(rc6);
708 u64 time_hw, prev_hw, overflow_hw;
709 unsigned int fw_domains;
710 unsigned long flags;
711 unsigned int i;
712 u32 mul, div;
713
714 if (!rc6->supported)
715 return 0;
716
717 /*
718 * Store previous hw counter values for counter wrap-around handling.
719 *
720 * There are only four interesting registers and they live next to each
721 * other so we can use the relative address, compared to the smallest
722 * one as the index into driver storage.
723 */
724 i = (i915_mmio_reg_offset(reg) -
725 i915_mmio_reg_offset(GEN6_GT_GFX_RC6_LOCKED)) / sizeof(u32);
726 if (WARN_ON_ONCE(i >= ARRAY_SIZE(rc6->cur_residency)))
727 return 0;
728
729 fw_domains = intel_uncore_forcewake_for_reg(uncore, reg, FW_REG_READ);
730
731 spin_lock_irqsave(&uncore->lock, flags);
732 intel_uncore_forcewake_get__locked(uncore, fw_domains);
733
734 /* On VLV and CHV, residency time is in CZ units rather than 1.28us */
735 if (IS_VALLEYVIEW(i915) || IS_CHERRYVIEW(i915)) {
736 mul = 1000000;
737 div = i915->czclk_freq;
738 overflow_hw = BIT_ULL(40);
739 time_hw = vlv_residency_raw(uncore, reg);
740 } else {
741 /* 833.33ns units on Gen9LP, 1.28us elsewhere. */
742 if (IS_GEN9_LP(i915)) {
743 mul = 10000;
744 div = 12;
745 } else {
746 mul = 1280;
747 div = 1;
748 }
749
750 overflow_hw = BIT_ULL(32);
751 time_hw = intel_uncore_read_fw(uncore, reg);
752 }
753
754 /*
755 * Counter wrap handling.
756 *
757 * But relying on a sufficient frequency of queries otherwise counters
758 * can still wrap.
759 */
760 prev_hw = rc6->prev_hw_residency[i];
761 rc6->prev_hw_residency[i] = time_hw;
762
763 /* RC6 delta from last sample. */
764 if (time_hw >= prev_hw)
765 time_hw -= prev_hw;
766 else
767 time_hw += overflow_hw - prev_hw;
768
769 /* Add delta to RC6 extended raw driver copy. */
770 time_hw += rc6->cur_residency[i];
771 rc6->cur_residency[i] = time_hw;
772
773 intel_uncore_forcewake_put__locked(uncore, fw_domains);
774 spin_unlock_irqrestore(&uncore->lock, flags);
775
776 return mul_u64_u32_div(time_hw, mul, div);
777 }
778
779 u64 intel_rc6_residency_us(struct intel_rc6 *rc6, i915_reg_t reg)
780 {
781 return DIV_ROUND_UP_ULL(intel_rc6_residency_ns(rc6, reg), 1000);
782 }