]> git.proxmox.com Git - mirror_ubuntu-bionic-kernel.git/blob - drivers/gpu/drm/i915/intel_pm.c
UBUNTU: Ubuntu-4.15.0-96.97
[mirror_ubuntu-bionic-kernel.git] / drivers / gpu / drm / i915 / intel_pm.c
1 /*
2 * Copyright © 2012 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 *
23 * Authors:
24 * Eugeni Dodonov <eugeni.dodonov@intel.com>
25 *
26 */
27
28 #include <linux/cpufreq.h>
29 #include <drm/drm_plane_helper.h>
30 #include "i915_drv.h"
31 #include "intel_drv.h"
32 #include "../../../platform/x86/intel_ips.h"
33 #include <linux/module.h>
34 #include <drm/drm_atomic_helper.h>
35
36 /**
37 * DOC: RC6
38 *
39 * RC6 is a special power stage which allows the GPU to enter an very
40 * low-voltage mode when idle, using down to 0V while at this stage. This
41 * stage is entered automatically when the GPU is idle when RC6 support is
42 * enabled, and as soon as new workload arises GPU wakes up automatically as well.
43 *
44 * There are different RC6 modes available in Intel GPU, which differentiate
45 * among each other with the latency required to enter and leave RC6 and
46 * voltage consumed by the GPU in different states.
47 *
48 * The combination of the following flags define which states GPU is allowed
49 * to enter, while RC6 is the normal RC6 state, RC6p is the deep RC6, and
50 * RC6pp is deepest RC6. Their support by hardware varies according to the
51 * GPU, BIOS, chipset and platform. RC6 is usually the safest one and the one
52 * which brings the most power savings; deeper states save more power, but
53 * require higher latency to switch to and wake up.
54 */
55 #define INTEL_RC6_ENABLE (1<<0)
56 #define INTEL_RC6p_ENABLE (1<<1)
57 #define INTEL_RC6pp_ENABLE (1<<2)
58
59 static void gen9_init_clock_gating(struct drm_i915_private *dev_priv)
60 {
61 if (HAS_LLC(dev_priv)) {
62 /*
63 * WaCompressedResourceDisplayNewHashMode:skl,kbl
64 * Display WA#0390: skl,kbl
65 *
66 * Must match Sampler, Pixel Back End, and Media. See
67 * WaCompressedResourceSamplerPbeMediaNewHashMode.
68 */
69 I915_WRITE(CHICKEN_PAR1_1,
70 I915_READ(CHICKEN_PAR1_1) |
71 SKL_DE_COMPRESSED_HASH_MODE);
72 }
73
74 /* See Bspec note for PSR2_CTL bit 31, Wa#828:skl,bxt,kbl,cfl */
75 I915_WRITE(CHICKEN_PAR1_1,
76 I915_READ(CHICKEN_PAR1_1) | SKL_EDP_PSR_FIX_RDWRAP);
77
78 I915_WRITE(GEN8_CONFIG0,
79 I915_READ(GEN8_CONFIG0) | GEN9_DEFAULT_FIXES);
80
81 /* WaEnableChickenDCPR:skl,bxt,kbl,glk,cfl */
82 I915_WRITE(GEN8_CHICKEN_DCPR_1,
83 I915_READ(GEN8_CHICKEN_DCPR_1) | MASK_WAKEMEM);
84
85 /* WaFbcTurnOffFbcWatermark:skl,bxt,kbl,cfl */
86 /* WaFbcWakeMemOn:skl,bxt,kbl,glk,cfl */
87 I915_WRITE(DISP_ARB_CTL, I915_READ(DISP_ARB_CTL) |
88 DISP_FBC_WM_DIS |
89 DISP_FBC_MEMORY_WAKE);
90
91 /* WaFbcHighMemBwCorruptionAvoidance:skl,bxt,kbl,cfl */
92 I915_WRITE(ILK_DPFC_CHICKEN, I915_READ(ILK_DPFC_CHICKEN) |
93 ILK_DPFC_DISABLE_DUMMY0);
94
95 if (IS_SKYLAKE(dev_priv)) {
96 /* WaDisableDopClockGating */
97 I915_WRITE(GEN7_MISCCPCTL, I915_READ(GEN7_MISCCPCTL)
98 & ~GEN7_DOP_CLOCK_GATE_ENABLE);
99 }
100 }
101
102 static void bxt_init_clock_gating(struct drm_i915_private *dev_priv)
103 {
104 gen9_init_clock_gating(dev_priv);
105
106 /* WaDisableSDEUnitClockGating:bxt */
107 I915_WRITE(GEN8_UCGCTL6, I915_READ(GEN8_UCGCTL6) |
108 GEN8_SDEUNIT_CLOCK_GATE_DISABLE);
109
110 /*
111 * FIXME:
112 * GEN8_HDCUNIT_CLOCK_GATE_DISABLE_HDCREQ applies on 3x6 GT SKUs only.
113 */
114 I915_WRITE(GEN8_UCGCTL6, I915_READ(GEN8_UCGCTL6) |
115 GEN8_HDCUNIT_CLOCK_GATE_DISABLE_HDCREQ);
116
117 /*
118 * Wa: Backlight PWM may stop in the asserted state, causing backlight
119 * to stay fully on.
120 */
121 I915_WRITE(GEN9_CLKGATE_DIS_0, I915_READ(GEN9_CLKGATE_DIS_0) |
122 PWM1_GATING_DIS | PWM2_GATING_DIS);
123
124 /*
125 * Lower the display internal timeout.
126 * This is needed to avoid any hard hangs when DSI port PLL
127 * is off and a MMIO access is attempted by any privilege
128 * application, using batch buffers or any other means.
129 */
130 I915_WRITE(RM_TIMEOUT, MMIO_TIMEOUT_US(950));
131 }
132
133 static void glk_init_clock_gating(struct drm_i915_private *dev_priv)
134 {
135 gen9_init_clock_gating(dev_priv);
136
137 /*
138 * WaDisablePWMClockGating:glk
139 * Backlight PWM may stop in the asserted state, causing backlight
140 * to stay fully on.
141 */
142 I915_WRITE(GEN9_CLKGATE_DIS_0, I915_READ(GEN9_CLKGATE_DIS_0) |
143 PWM1_GATING_DIS | PWM2_GATING_DIS);
144
145 /* WaDDIIOTimeout:glk */
146 if (IS_GLK_REVID(dev_priv, 0, GLK_REVID_A1)) {
147 u32 val = I915_READ(CHICKEN_MISC_2);
148 val &= ~(GLK_CL0_PWR_DOWN |
149 GLK_CL1_PWR_DOWN |
150 GLK_CL2_PWR_DOWN);
151 I915_WRITE(CHICKEN_MISC_2, val);
152 }
153
154 }
155
156 static void i915_pineview_get_mem_freq(struct drm_i915_private *dev_priv)
157 {
158 u32 tmp;
159
160 tmp = I915_READ(CLKCFG);
161
162 switch (tmp & CLKCFG_FSB_MASK) {
163 case CLKCFG_FSB_533:
164 dev_priv->fsb_freq = 533; /* 133*4 */
165 break;
166 case CLKCFG_FSB_800:
167 dev_priv->fsb_freq = 800; /* 200*4 */
168 break;
169 case CLKCFG_FSB_667:
170 dev_priv->fsb_freq = 667; /* 167*4 */
171 break;
172 case CLKCFG_FSB_400:
173 dev_priv->fsb_freq = 400; /* 100*4 */
174 break;
175 }
176
177 switch (tmp & CLKCFG_MEM_MASK) {
178 case CLKCFG_MEM_533:
179 dev_priv->mem_freq = 533;
180 break;
181 case CLKCFG_MEM_667:
182 dev_priv->mem_freq = 667;
183 break;
184 case CLKCFG_MEM_800:
185 dev_priv->mem_freq = 800;
186 break;
187 }
188
189 /* detect pineview DDR3 setting */
190 tmp = I915_READ(CSHRDDR3CTL);
191 dev_priv->is_ddr3 = (tmp & CSHRDDR3CTL_DDR3) ? 1 : 0;
192 }
193
194 static void i915_ironlake_get_mem_freq(struct drm_i915_private *dev_priv)
195 {
196 u16 ddrpll, csipll;
197
198 ddrpll = I915_READ16(DDRMPLL1);
199 csipll = I915_READ16(CSIPLL0);
200
201 switch (ddrpll & 0xff) {
202 case 0xc:
203 dev_priv->mem_freq = 800;
204 break;
205 case 0x10:
206 dev_priv->mem_freq = 1066;
207 break;
208 case 0x14:
209 dev_priv->mem_freq = 1333;
210 break;
211 case 0x18:
212 dev_priv->mem_freq = 1600;
213 break;
214 default:
215 DRM_DEBUG_DRIVER("unknown memory frequency 0x%02x\n",
216 ddrpll & 0xff);
217 dev_priv->mem_freq = 0;
218 break;
219 }
220
221 dev_priv->ips.r_t = dev_priv->mem_freq;
222
223 switch (csipll & 0x3ff) {
224 case 0x00c:
225 dev_priv->fsb_freq = 3200;
226 break;
227 case 0x00e:
228 dev_priv->fsb_freq = 3733;
229 break;
230 case 0x010:
231 dev_priv->fsb_freq = 4266;
232 break;
233 case 0x012:
234 dev_priv->fsb_freq = 4800;
235 break;
236 case 0x014:
237 dev_priv->fsb_freq = 5333;
238 break;
239 case 0x016:
240 dev_priv->fsb_freq = 5866;
241 break;
242 case 0x018:
243 dev_priv->fsb_freq = 6400;
244 break;
245 default:
246 DRM_DEBUG_DRIVER("unknown fsb frequency 0x%04x\n",
247 csipll & 0x3ff);
248 dev_priv->fsb_freq = 0;
249 break;
250 }
251
252 if (dev_priv->fsb_freq == 3200) {
253 dev_priv->ips.c_m = 0;
254 } else if (dev_priv->fsb_freq > 3200 && dev_priv->fsb_freq <= 4800) {
255 dev_priv->ips.c_m = 1;
256 } else {
257 dev_priv->ips.c_m = 2;
258 }
259 }
260
261 static const struct cxsr_latency cxsr_latency_table[] = {
262 {1, 0, 800, 400, 3382, 33382, 3983, 33983}, /* DDR2-400 SC */
263 {1, 0, 800, 667, 3354, 33354, 3807, 33807}, /* DDR2-667 SC */
264 {1, 0, 800, 800, 3347, 33347, 3763, 33763}, /* DDR2-800 SC */
265 {1, 1, 800, 667, 6420, 36420, 6873, 36873}, /* DDR3-667 SC */
266 {1, 1, 800, 800, 5902, 35902, 6318, 36318}, /* DDR3-800 SC */
267
268 {1, 0, 667, 400, 3400, 33400, 4021, 34021}, /* DDR2-400 SC */
269 {1, 0, 667, 667, 3372, 33372, 3845, 33845}, /* DDR2-667 SC */
270 {1, 0, 667, 800, 3386, 33386, 3822, 33822}, /* DDR2-800 SC */
271 {1, 1, 667, 667, 6438, 36438, 6911, 36911}, /* DDR3-667 SC */
272 {1, 1, 667, 800, 5941, 35941, 6377, 36377}, /* DDR3-800 SC */
273
274 {1, 0, 400, 400, 3472, 33472, 4173, 34173}, /* DDR2-400 SC */
275 {1, 0, 400, 667, 3443, 33443, 3996, 33996}, /* DDR2-667 SC */
276 {1, 0, 400, 800, 3430, 33430, 3946, 33946}, /* DDR2-800 SC */
277 {1, 1, 400, 667, 6509, 36509, 7062, 37062}, /* DDR3-667 SC */
278 {1, 1, 400, 800, 5985, 35985, 6501, 36501}, /* DDR3-800 SC */
279
280 {0, 0, 800, 400, 3438, 33438, 4065, 34065}, /* DDR2-400 SC */
281 {0, 0, 800, 667, 3410, 33410, 3889, 33889}, /* DDR2-667 SC */
282 {0, 0, 800, 800, 3403, 33403, 3845, 33845}, /* DDR2-800 SC */
283 {0, 1, 800, 667, 6476, 36476, 6955, 36955}, /* DDR3-667 SC */
284 {0, 1, 800, 800, 5958, 35958, 6400, 36400}, /* DDR3-800 SC */
285
286 {0, 0, 667, 400, 3456, 33456, 4103, 34106}, /* DDR2-400 SC */
287 {0, 0, 667, 667, 3428, 33428, 3927, 33927}, /* DDR2-667 SC */
288 {0, 0, 667, 800, 3443, 33443, 3905, 33905}, /* DDR2-800 SC */
289 {0, 1, 667, 667, 6494, 36494, 6993, 36993}, /* DDR3-667 SC */
290 {0, 1, 667, 800, 5998, 35998, 6460, 36460}, /* DDR3-800 SC */
291
292 {0, 0, 400, 400, 3528, 33528, 4255, 34255}, /* DDR2-400 SC */
293 {0, 0, 400, 667, 3500, 33500, 4079, 34079}, /* DDR2-667 SC */
294 {0, 0, 400, 800, 3487, 33487, 4029, 34029}, /* DDR2-800 SC */
295 {0, 1, 400, 667, 6566, 36566, 7145, 37145}, /* DDR3-667 SC */
296 {0, 1, 400, 800, 6042, 36042, 6584, 36584}, /* DDR3-800 SC */
297 };
298
299 static const struct cxsr_latency *intel_get_cxsr_latency(bool is_desktop,
300 bool is_ddr3,
301 int fsb,
302 int mem)
303 {
304 const struct cxsr_latency *latency;
305 int i;
306
307 if (fsb == 0 || mem == 0)
308 return NULL;
309
310 for (i = 0; i < ARRAY_SIZE(cxsr_latency_table); i++) {
311 latency = &cxsr_latency_table[i];
312 if (is_desktop == latency->is_desktop &&
313 is_ddr3 == latency->is_ddr3 &&
314 fsb == latency->fsb_freq && mem == latency->mem_freq)
315 return latency;
316 }
317
318 DRM_DEBUG_KMS("Unknown FSB/MEM found, disable CxSR\n");
319
320 return NULL;
321 }
322
323 static void chv_set_memory_dvfs(struct drm_i915_private *dev_priv, bool enable)
324 {
325 u32 val;
326
327 mutex_lock(&dev_priv->pcu_lock);
328
329 val = vlv_punit_read(dev_priv, PUNIT_REG_DDR_SETUP2);
330 if (enable)
331 val &= ~FORCE_DDR_HIGH_FREQ;
332 else
333 val |= FORCE_DDR_HIGH_FREQ;
334 val &= ~FORCE_DDR_LOW_FREQ;
335 val |= FORCE_DDR_FREQ_REQ_ACK;
336 vlv_punit_write(dev_priv, PUNIT_REG_DDR_SETUP2, val);
337
338 if (wait_for((vlv_punit_read(dev_priv, PUNIT_REG_DDR_SETUP2) &
339 FORCE_DDR_FREQ_REQ_ACK) == 0, 3))
340 DRM_ERROR("timed out waiting for Punit DDR DVFS request\n");
341
342 mutex_unlock(&dev_priv->pcu_lock);
343 }
344
345 static void chv_set_memory_pm5(struct drm_i915_private *dev_priv, bool enable)
346 {
347 u32 val;
348
349 mutex_lock(&dev_priv->pcu_lock);
350
351 val = vlv_punit_read(dev_priv, PUNIT_REG_DSPFREQ);
352 if (enable)
353 val |= DSP_MAXFIFO_PM5_ENABLE;
354 else
355 val &= ~DSP_MAXFIFO_PM5_ENABLE;
356 vlv_punit_write(dev_priv, PUNIT_REG_DSPFREQ, val);
357
358 mutex_unlock(&dev_priv->pcu_lock);
359 }
360
361 #define FW_WM(value, plane) \
362 (((value) << DSPFW_ ## plane ## _SHIFT) & DSPFW_ ## plane ## _MASK)
363
364 static bool _intel_set_memory_cxsr(struct drm_i915_private *dev_priv, bool enable)
365 {
366 bool was_enabled;
367 u32 val;
368
369 if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) {
370 was_enabled = I915_READ(FW_BLC_SELF_VLV) & FW_CSPWRDWNEN;
371 I915_WRITE(FW_BLC_SELF_VLV, enable ? FW_CSPWRDWNEN : 0);
372 POSTING_READ(FW_BLC_SELF_VLV);
373 } else if (IS_G4X(dev_priv) || IS_I965GM(dev_priv)) {
374 was_enabled = I915_READ(FW_BLC_SELF) & FW_BLC_SELF_EN;
375 I915_WRITE(FW_BLC_SELF, enable ? FW_BLC_SELF_EN : 0);
376 POSTING_READ(FW_BLC_SELF);
377 } else if (IS_PINEVIEW(dev_priv)) {
378 val = I915_READ(DSPFW3);
379 was_enabled = val & PINEVIEW_SELF_REFRESH_EN;
380 if (enable)
381 val |= PINEVIEW_SELF_REFRESH_EN;
382 else
383 val &= ~PINEVIEW_SELF_REFRESH_EN;
384 I915_WRITE(DSPFW3, val);
385 POSTING_READ(DSPFW3);
386 } else if (IS_I945G(dev_priv) || IS_I945GM(dev_priv)) {
387 was_enabled = I915_READ(FW_BLC_SELF) & FW_BLC_SELF_EN;
388 val = enable ? _MASKED_BIT_ENABLE(FW_BLC_SELF_EN) :
389 _MASKED_BIT_DISABLE(FW_BLC_SELF_EN);
390 I915_WRITE(FW_BLC_SELF, val);
391 POSTING_READ(FW_BLC_SELF);
392 } else if (IS_I915GM(dev_priv)) {
393 /*
394 * FIXME can't find a bit like this for 915G, and
395 * and yet it does have the related watermark in
396 * FW_BLC_SELF. What's going on?
397 */
398 was_enabled = I915_READ(INSTPM) & INSTPM_SELF_EN;
399 val = enable ? _MASKED_BIT_ENABLE(INSTPM_SELF_EN) :
400 _MASKED_BIT_DISABLE(INSTPM_SELF_EN);
401 I915_WRITE(INSTPM, val);
402 POSTING_READ(INSTPM);
403 } else {
404 return false;
405 }
406
407 trace_intel_memory_cxsr(dev_priv, was_enabled, enable);
408
409 DRM_DEBUG_KMS("memory self-refresh is %s (was %s)\n",
410 enableddisabled(enable),
411 enableddisabled(was_enabled));
412
413 return was_enabled;
414 }
415
416 /**
417 * intel_set_memory_cxsr - Configure CxSR state
418 * @dev_priv: i915 device
419 * @enable: Allow vs. disallow CxSR
420 *
421 * Allow or disallow the system to enter a special CxSR
422 * (C-state self refresh) state. What typically happens in CxSR mode
423 * is that several display FIFOs may get combined into a single larger
424 * FIFO for a particular plane (so called max FIFO mode) to allow the
425 * system to defer memory fetches longer, and the memory will enter
426 * self refresh.
427 *
428 * Note that enabling CxSR does not guarantee that the system enter
429 * this special mode, nor does it guarantee that the system stays
430 * in that mode once entered. So this just allows/disallows the system
431 * to autonomously utilize the CxSR mode. Other factors such as core
432 * C-states will affect when/if the system actually enters/exits the
433 * CxSR mode.
434 *
435 * Note that on VLV/CHV this actually only controls the max FIFO mode,
436 * and the system is free to enter/exit memory self refresh at any time
437 * even when the use of CxSR has been disallowed.
438 *
439 * While the system is actually in the CxSR/max FIFO mode, some plane
440 * control registers will not get latched on vblank. Thus in order to
441 * guarantee the system will respond to changes in the plane registers
442 * we must always disallow CxSR prior to making changes to those registers.
443 * Unfortunately the system will re-evaluate the CxSR conditions at
444 * frame start which happens after vblank start (which is when the plane
445 * registers would get latched), so we can't proceed with the plane update
446 * during the same frame where we disallowed CxSR.
447 *
448 * Certain platforms also have a deeper HPLL SR mode. Fortunately the
449 * HPLL SR mode depends on CxSR itself, so we don't have to hand hold
450 * the hardware w.r.t. HPLL SR when writing to plane registers.
451 * Disallowing just CxSR is sufficient.
452 */
453 bool intel_set_memory_cxsr(struct drm_i915_private *dev_priv, bool enable)
454 {
455 bool ret;
456
457 mutex_lock(&dev_priv->wm.wm_mutex);
458 ret = _intel_set_memory_cxsr(dev_priv, enable);
459 if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv))
460 dev_priv->wm.vlv.cxsr = enable;
461 else if (IS_G4X(dev_priv))
462 dev_priv->wm.g4x.cxsr = enable;
463 mutex_unlock(&dev_priv->wm.wm_mutex);
464
465 return ret;
466 }
467
468 /*
469 * Latency for FIFO fetches is dependent on several factors:
470 * - memory configuration (speed, channels)
471 * - chipset
472 * - current MCH state
473 * It can be fairly high in some situations, so here we assume a fairly
474 * pessimal value. It's a tradeoff between extra memory fetches (if we
475 * set this value too high, the FIFO will fetch frequently to stay full)
476 * and power consumption (set it too low to save power and we might see
477 * FIFO underruns and display "flicker").
478 *
479 * A value of 5us seems to be a good balance; safe for very low end
480 * platforms but not overly aggressive on lower latency configs.
481 */
482 static const int pessimal_latency_ns = 5000;
483
484 #define VLV_FIFO_START(dsparb, dsparb2, lo_shift, hi_shift) \
485 ((((dsparb) >> (lo_shift)) & 0xff) | ((((dsparb2) >> (hi_shift)) & 0x1) << 8))
486
487 static void vlv_get_fifo_size(struct intel_crtc_state *crtc_state)
488 {
489 struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc);
490 struct drm_i915_private *dev_priv = to_i915(crtc->base.dev);
491 struct vlv_fifo_state *fifo_state = &crtc_state->wm.vlv.fifo_state;
492 enum pipe pipe = crtc->pipe;
493 int sprite0_start, sprite1_start;
494
495 switch (pipe) {
496 uint32_t dsparb, dsparb2, dsparb3;
497 case PIPE_A:
498 dsparb = I915_READ(DSPARB);
499 dsparb2 = I915_READ(DSPARB2);
500 sprite0_start = VLV_FIFO_START(dsparb, dsparb2, 0, 0);
501 sprite1_start = VLV_FIFO_START(dsparb, dsparb2, 8, 4);
502 break;
503 case PIPE_B:
504 dsparb = I915_READ(DSPARB);
505 dsparb2 = I915_READ(DSPARB2);
506 sprite0_start = VLV_FIFO_START(dsparb, dsparb2, 16, 8);
507 sprite1_start = VLV_FIFO_START(dsparb, dsparb2, 24, 12);
508 break;
509 case PIPE_C:
510 dsparb2 = I915_READ(DSPARB2);
511 dsparb3 = I915_READ(DSPARB3);
512 sprite0_start = VLV_FIFO_START(dsparb3, dsparb2, 0, 16);
513 sprite1_start = VLV_FIFO_START(dsparb3, dsparb2, 8, 20);
514 break;
515 default:
516 MISSING_CASE(pipe);
517 return;
518 }
519
520 fifo_state->plane[PLANE_PRIMARY] = sprite0_start;
521 fifo_state->plane[PLANE_SPRITE0] = sprite1_start - sprite0_start;
522 fifo_state->plane[PLANE_SPRITE1] = 511 - sprite1_start;
523 fifo_state->plane[PLANE_CURSOR] = 63;
524 }
525
526 static int i9xx_get_fifo_size(struct drm_i915_private *dev_priv, int plane)
527 {
528 uint32_t dsparb = I915_READ(DSPARB);
529 int size;
530
531 size = dsparb & 0x7f;
532 if (plane)
533 size = ((dsparb >> DSPARB_CSTART_SHIFT) & 0x7f) - size;
534
535 DRM_DEBUG_KMS("FIFO size - (0x%08x) %s: %d\n", dsparb,
536 plane ? "B" : "A", size);
537
538 return size;
539 }
540
541 static int i830_get_fifo_size(struct drm_i915_private *dev_priv, int plane)
542 {
543 uint32_t dsparb = I915_READ(DSPARB);
544 int size;
545
546 size = dsparb & 0x1ff;
547 if (plane)
548 size = ((dsparb >> DSPARB_BEND_SHIFT) & 0x1ff) - size;
549 size >>= 1; /* Convert to cachelines */
550
551 DRM_DEBUG_KMS("FIFO size - (0x%08x) %s: %d\n", dsparb,
552 plane ? "B" : "A", size);
553
554 return size;
555 }
556
557 static int i845_get_fifo_size(struct drm_i915_private *dev_priv, int plane)
558 {
559 uint32_t dsparb = I915_READ(DSPARB);
560 int size;
561
562 size = dsparb & 0x7f;
563 size >>= 2; /* Convert to cachelines */
564
565 DRM_DEBUG_KMS("FIFO size - (0x%08x) %s: %d\n", dsparb,
566 plane ? "B" : "A",
567 size);
568
569 return size;
570 }
571
572 /* Pineview has different values for various configs */
573 static const struct intel_watermark_params pineview_display_wm = {
574 .fifo_size = PINEVIEW_DISPLAY_FIFO,
575 .max_wm = PINEVIEW_MAX_WM,
576 .default_wm = PINEVIEW_DFT_WM,
577 .guard_size = PINEVIEW_GUARD_WM,
578 .cacheline_size = PINEVIEW_FIFO_LINE_SIZE,
579 };
580 static const struct intel_watermark_params pineview_display_hplloff_wm = {
581 .fifo_size = PINEVIEW_DISPLAY_FIFO,
582 .max_wm = PINEVIEW_MAX_WM,
583 .default_wm = PINEVIEW_DFT_HPLLOFF_WM,
584 .guard_size = PINEVIEW_GUARD_WM,
585 .cacheline_size = PINEVIEW_FIFO_LINE_SIZE,
586 };
587 static const struct intel_watermark_params pineview_cursor_wm = {
588 .fifo_size = PINEVIEW_CURSOR_FIFO,
589 .max_wm = PINEVIEW_CURSOR_MAX_WM,
590 .default_wm = PINEVIEW_CURSOR_DFT_WM,
591 .guard_size = PINEVIEW_CURSOR_GUARD_WM,
592 .cacheline_size = PINEVIEW_FIFO_LINE_SIZE,
593 };
594 static const struct intel_watermark_params pineview_cursor_hplloff_wm = {
595 .fifo_size = PINEVIEW_CURSOR_FIFO,
596 .max_wm = PINEVIEW_CURSOR_MAX_WM,
597 .default_wm = PINEVIEW_CURSOR_DFT_WM,
598 .guard_size = PINEVIEW_CURSOR_GUARD_WM,
599 .cacheline_size = PINEVIEW_FIFO_LINE_SIZE,
600 };
601 static const struct intel_watermark_params i965_cursor_wm_info = {
602 .fifo_size = I965_CURSOR_FIFO,
603 .max_wm = I965_CURSOR_MAX_WM,
604 .default_wm = I965_CURSOR_DFT_WM,
605 .guard_size = 2,
606 .cacheline_size = I915_FIFO_LINE_SIZE,
607 };
608 static const struct intel_watermark_params i945_wm_info = {
609 .fifo_size = I945_FIFO_SIZE,
610 .max_wm = I915_MAX_WM,
611 .default_wm = 1,
612 .guard_size = 2,
613 .cacheline_size = I915_FIFO_LINE_SIZE,
614 };
615 static const struct intel_watermark_params i915_wm_info = {
616 .fifo_size = I915_FIFO_SIZE,
617 .max_wm = I915_MAX_WM,
618 .default_wm = 1,
619 .guard_size = 2,
620 .cacheline_size = I915_FIFO_LINE_SIZE,
621 };
622 static const struct intel_watermark_params i830_a_wm_info = {
623 .fifo_size = I855GM_FIFO_SIZE,
624 .max_wm = I915_MAX_WM,
625 .default_wm = 1,
626 .guard_size = 2,
627 .cacheline_size = I830_FIFO_LINE_SIZE,
628 };
629 static const struct intel_watermark_params i830_bc_wm_info = {
630 .fifo_size = I855GM_FIFO_SIZE,
631 .max_wm = I915_MAX_WM/2,
632 .default_wm = 1,
633 .guard_size = 2,
634 .cacheline_size = I830_FIFO_LINE_SIZE,
635 };
636 static const struct intel_watermark_params i845_wm_info = {
637 .fifo_size = I830_FIFO_SIZE,
638 .max_wm = I915_MAX_WM,
639 .default_wm = 1,
640 .guard_size = 2,
641 .cacheline_size = I830_FIFO_LINE_SIZE,
642 };
643
644 /**
645 * intel_wm_method1 - Method 1 / "small buffer" watermark formula
646 * @pixel_rate: Pipe pixel rate in kHz
647 * @cpp: Plane bytes per pixel
648 * @latency: Memory wakeup latency in 0.1us units
649 *
650 * Compute the watermark using the method 1 or "small buffer"
651 * formula. The caller may additonally add extra cachelines
652 * to account for TLB misses and clock crossings.
653 *
654 * This method is concerned with the short term drain rate
655 * of the FIFO, ie. it does not account for blanking periods
656 * which would effectively reduce the average drain rate across
657 * a longer period. The name "small" refers to the fact the
658 * FIFO is relatively small compared to the amount of data
659 * fetched.
660 *
661 * The FIFO level vs. time graph might look something like:
662 *
663 * |\ |\
664 * | \ | \
665 * __---__---__ (- plane active, _ blanking)
666 * -> time
667 *
668 * or perhaps like this:
669 *
670 * |\|\ |\|\
671 * __----__----__ (- plane active, _ blanking)
672 * -> time
673 *
674 * Returns:
675 * The watermark in bytes
676 */
677 static unsigned int intel_wm_method1(unsigned int pixel_rate,
678 unsigned int cpp,
679 unsigned int latency)
680 {
681 uint64_t ret;
682
683 ret = (uint64_t) pixel_rate * cpp * latency;
684 ret = DIV_ROUND_UP_ULL(ret, 10000);
685
686 return ret;
687 }
688
689 /**
690 * intel_wm_method2 - Method 2 / "large buffer" watermark formula
691 * @pixel_rate: Pipe pixel rate in kHz
692 * @htotal: Pipe horizontal total
693 * @width: Plane width in pixels
694 * @cpp: Plane bytes per pixel
695 * @latency: Memory wakeup latency in 0.1us units
696 *
697 * Compute the watermark using the method 2 or "large buffer"
698 * formula. The caller may additonally add extra cachelines
699 * to account for TLB misses and clock crossings.
700 *
701 * This method is concerned with the long term drain rate
702 * of the FIFO, ie. it does account for blanking periods
703 * which effectively reduce the average drain rate across
704 * a longer period. The name "large" refers to the fact the
705 * FIFO is relatively large compared to the amount of data
706 * fetched.
707 *
708 * The FIFO level vs. time graph might look something like:
709 *
710 * |\___ |\___
711 * | \___ | \___
712 * | \ | \
713 * __ --__--__--__--__--__--__ (- plane active, _ blanking)
714 * -> time
715 *
716 * Returns:
717 * The watermark in bytes
718 */
719 static unsigned int intel_wm_method2(unsigned int pixel_rate,
720 unsigned int htotal,
721 unsigned int width,
722 unsigned int cpp,
723 unsigned int latency)
724 {
725 unsigned int ret;
726
727 /*
728 * FIXME remove once all users are computing
729 * watermarks in the correct place.
730 */
731 if (WARN_ON_ONCE(htotal == 0))
732 htotal = 1;
733
734 ret = (latency * pixel_rate) / (htotal * 10000);
735 ret = (ret + 1) * width * cpp;
736
737 return ret;
738 }
739
740 /**
741 * intel_calculate_wm - calculate watermark level
742 * @pixel_rate: pixel clock
743 * @wm: chip FIFO params
744 * @cpp: bytes per pixel
745 * @latency_ns: memory latency for the platform
746 *
747 * Calculate the watermark level (the level at which the display plane will
748 * start fetching from memory again). Each chip has a different display
749 * FIFO size and allocation, so the caller needs to figure that out and pass
750 * in the correct intel_watermark_params structure.
751 *
752 * As the pixel clock runs, the FIFO will be drained at a rate that depends
753 * on the pixel size. When it reaches the watermark level, it'll start
754 * fetching FIFO line sized based chunks from memory until the FIFO fills
755 * past the watermark point. If the FIFO drains completely, a FIFO underrun
756 * will occur, and a display engine hang could result.
757 */
758 static unsigned int intel_calculate_wm(int pixel_rate,
759 const struct intel_watermark_params *wm,
760 int fifo_size, int cpp,
761 unsigned int latency_ns)
762 {
763 int entries, wm_size;
764
765 /*
766 * Note: we need to make sure we don't overflow for various clock &
767 * latency values.
768 * clocks go from a few thousand to several hundred thousand.
769 * latency is usually a few thousand
770 */
771 entries = intel_wm_method1(pixel_rate, cpp,
772 latency_ns / 100);
773 entries = DIV_ROUND_UP(entries, wm->cacheline_size) +
774 wm->guard_size;
775 DRM_DEBUG_KMS("FIFO entries required for mode: %d\n", entries);
776
777 wm_size = fifo_size - entries;
778 DRM_DEBUG_KMS("FIFO watermark level: %d\n", wm_size);
779
780 /* Don't promote wm_size to unsigned... */
781 if (wm_size > wm->max_wm)
782 wm_size = wm->max_wm;
783 if (wm_size <= 0)
784 wm_size = wm->default_wm;
785
786 /*
787 * Bspec seems to indicate that the value shouldn't be lower than
788 * 'burst size + 1'. Certainly 830 is quite unhappy with low values.
789 * Lets go for 8 which is the burst size since certain platforms
790 * already use a hardcoded 8 (which is what the spec says should be
791 * done).
792 */
793 if (wm_size <= 8)
794 wm_size = 8;
795
796 return wm_size;
797 }
798
799 static bool is_disabling(int old, int new, int threshold)
800 {
801 return old >= threshold && new < threshold;
802 }
803
804 static bool is_enabling(int old, int new, int threshold)
805 {
806 return old < threshold && new >= threshold;
807 }
808
809 static int intel_wm_num_levels(struct drm_i915_private *dev_priv)
810 {
811 return dev_priv->wm.max_level + 1;
812 }
813
814 static bool intel_wm_plane_visible(const struct intel_crtc_state *crtc_state,
815 const struct intel_plane_state *plane_state)
816 {
817 struct intel_plane *plane = to_intel_plane(plane_state->base.plane);
818
819 /* FIXME check the 'enable' instead */
820 if (!crtc_state->base.active)
821 return false;
822
823 /*
824 * Treat cursor with fb as always visible since cursor updates
825 * can happen faster than the vrefresh rate, and the current
826 * watermark code doesn't handle that correctly. Cursor updates
827 * which set/clear the fb or change the cursor size are going
828 * to get throttled by intel_legacy_cursor_update() to work
829 * around this problem with the watermark code.
830 */
831 if (plane->id == PLANE_CURSOR)
832 return plane_state->base.fb != NULL;
833 else
834 return plane_state->base.visible;
835 }
836
837 static struct intel_crtc *single_enabled_crtc(struct drm_i915_private *dev_priv)
838 {
839 struct intel_crtc *crtc, *enabled = NULL;
840
841 for_each_intel_crtc(&dev_priv->drm, crtc) {
842 if (intel_crtc_active(crtc)) {
843 if (enabled)
844 return NULL;
845 enabled = crtc;
846 }
847 }
848
849 return enabled;
850 }
851
852 static void pineview_update_wm(struct intel_crtc *unused_crtc)
853 {
854 struct drm_i915_private *dev_priv = to_i915(unused_crtc->base.dev);
855 struct intel_crtc *crtc;
856 const struct cxsr_latency *latency;
857 u32 reg;
858 unsigned int wm;
859
860 latency = intel_get_cxsr_latency(IS_PINEVIEW_G(dev_priv),
861 dev_priv->is_ddr3,
862 dev_priv->fsb_freq,
863 dev_priv->mem_freq);
864 if (!latency) {
865 DRM_DEBUG_KMS("Unknown FSB/MEM found, disable CxSR\n");
866 intel_set_memory_cxsr(dev_priv, false);
867 return;
868 }
869
870 crtc = single_enabled_crtc(dev_priv);
871 if (crtc) {
872 const struct drm_display_mode *adjusted_mode =
873 &crtc->config->base.adjusted_mode;
874 const struct drm_framebuffer *fb =
875 crtc->base.primary->state->fb;
876 int cpp = fb->format->cpp[0];
877 int clock = adjusted_mode->crtc_clock;
878
879 /* Display SR */
880 wm = intel_calculate_wm(clock, &pineview_display_wm,
881 pineview_display_wm.fifo_size,
882 cpp, latency->display_sr);
883 reg = I915_READ(DSPFW1);
884 reg &= ~DSPFW_SR_MASK;
885 reg |= FW_WM(wm, SR);
886 I915_WRITE(DSPFW1, reg);
887 DRM_DEBUG_KMS("DSPFW1 register is %x\n", reg);
888
889 /* cursor SR */
890 wm = intel_calculate_wm(clock, &pineview_cursor_wm,
891 pineview_display_wm.fifo_size,
892 4, latency->cursor_sr);
893 reg = I915_READ(DSPFW3);
894 reg &= ~DSPFW_CURSOR_SR_MASK;
895 reg |= FW_WM(wm, CURSOR_SR);
896 I915_WRITE(DSPFW3, reg);
897
898 /* Display HPLL off SR */
899 wm = intel_calculate_wm(clock, &pineview_display_hplloff_wm,
900 pineview_display_hplloff_wm.fifo_size,
901 cpp, latency->display_hpll_disable);
902 reg = I915_READ(DSPFW3);
903 reg &= ~DSPFW_HPLL_SR_MASK;
904 reg |= FW_WM(wm, HPLL_SR);
905 I915_WRITE(DSPFW3, reg);
906
907 /* cursor HPLL off SR */
908 wm = intel_calculate_wm(clock, &pineview_cursor_hplloff_wm,
909 pineview_display_hplloff_wm.fifo_size,
910 4, latency->cursor_hpll_disable);
911 reg = I915_READ(DSPFW3);
912 reg &= ~DSPFW_HPLL_CURSOR_MASK;
913 reg |= FW_WM(wm, HPLL_CURSOR);
914 I915_WRITE(DSPFW3, reg);
915 DRM_DEBUG_KMS("DSPFW3 register is %x\n", reg);
916
917 intel_set_memory_cxsr(dev_priv, true);
918 } else {
919 intel_set_memory_cxsr(dev_priv, false);
920 }
921 }
922
923 /*
924 * Documentation says:
925 * "If the line size is small, the TLB fetches can get in the way of the
926 * data fetches, causing some lag in the pixel data return which is not
927 * accounted for in the above formulas. The following adjustment only
928 * needs to be applied if eight whole lines fit in the buffer at once.
929 * The WM is adjusted upwards by the difference between the FIFO size
930 * and the size of 8 whole lines. This adjustment is always performed
931 * in the actual pixel depth regardless of whether FBC is enabled or not."
932 */
933 static int g4x_tlb_miss_wa(int fifo_size, int width, int cpp)
934 {
935 int tlb_miss = fifo_size * 64 - width * cpp * 8;
936
937 return max(0, tlb_miss);
938 }
939
940 static void g4x_write_wm_values(struct drm_i915_private *dev_priv,
941 const struct g4x_wm_values *wm)
942 {
943 enum pipe pipe;
944
945 for_each_pipe(dev_priv, pipe)
946 trace_g4x_wm(intel_get_crtc_for_pipe(dev_priv, pipe), wm);
947
948 I915_WRITE(DSPFW1,
949 FW_WM(wm->sr.plane, SR) |
950 FW_WM(wm->pipe[PIPE_B].plane[PLANE_CURSOR], CURSORB) |
951 FW_WM(wm->pipe[PIPE_B].plane[PLANE_PRIMARY], PLANEB) |
952 FW_WM(wm->pipe[PIPE_A].plane[PLANE_PRIMARY], PLANEA));
953 I915_WRITE(DSPFW2,
954 (wm->fbc_en ? DSPFW_FBC_SR_EN : 0) |
955 FW_WM(wm->sr.fbc, FBC_SR) |
956 FW_WM(wm->hpll.fbc, FBC_HPLL_SR) |
957 FW_WM(wm->pipe[PIPE_B].plane[PLANE_SPRITE0], SPRITEB) |
958 FW_WM(wm->pipe[PIPE_A].plane[PLANE_CURSOR], CURSORA) |
959 FW_WM(wm->pipe[PIPE_A].plane[PLANE_SPRITE0], SPRITEA));
960 I915_WRITE(DSPFW3,
961 (wm->hpll_en ? DSPFW_HPLL_SR_EN : 0) |
962 FW_WM(wm->sr.cursor, CURSOR_SR) |
963 FW_WM(wm->hpll.cursor, HPLL_CURSOR) |
964 FW_WM(wm->hpll.plane, HPLL_SR));
965
966 POSTING_READ(DSPFW1);
967 }
968
969 #define FW_WM_VLV(value, plane) \
970 (((value) << DSPFW_ ## plane ## _SHIFT) & DSPFW_ ## plane ## _MASK_VLV)
971
972 static void vlv_write_wm_values(struct drm_i915_private *dev_priv,
973 const struct vlv_wm_values *wm)
974 {
975 enum pipe pipe;
976
977 for_each_pipe(dev_priv, pipe) {
978 trace_vlv_wm(intel_get_crtc_for_pipe(dev_priv, pipe), wm);
979
980 I915_WRITE(VLV_DDL(pipe),
981 (wm->ddl[pipe].plane[PLANE_CURSOR] << DDL_CURSOR_SHIFT) |
982 (wm->ddl[pipe].plane[PLANE_SPRITE1] << DDL_SPRITE_SHIFT(1)) |
983 (wm->ddl[pipe].plane[PLANE_SPRITE0] << DDL_SPRITE_SHIFT(0)) |
984 (wm->ddl[pipe].plane[PLANE_PRIMARY] << DDL_PLANE_SHIFT));
985 }
986
987 /*
988 * Zero the (unused) WM1 watermarks, and also clear all the
989 * high order bits so that there are no out of bounds values
990 * present in the registers during the reprogramming.
991 */
992 I915_WRITE(DSPHOWM, 0);
993 I915_WRITE(DSPHOWM1, 0);
994 I915_WRITE(DSPFW4, 0);
995 I915_WRITE(DSPFW5, 0);
996 I915_WRITE(DSPFW6, 0);
997
998 I915_WRITE(DSPFW1,
999 FW_WM(wm->sr.plane, SR) |
1000 FW_WM(wm->pipe[PIPE_B].plane[PLANE_CURSOR], CURSORB) |
1001 FW_WM_VLV(wm->pipe[PIPE_B].plane[PLANE_PRIMARY], PLANEB) |
1002 FW_WM_VLV(wm->pipe[PIPE_A].plane[PLANE_PRIMARY], PLANEA));
1003 I915_WRITE(DSPFW2,
1004 FW_WM_VLV(wm->pipe[PIPE_A].plane[PLANE_SPRITE1], SPRITEB) |
1005 FW_WM(wm->pipe[PIPE_A].plane[PLANE_CURSOR], CURSORA) |
1006 FW_WM_VLV(wm->pipe[PIPE_A].plane[PLANE_SPRITE0], SPRITEA));
1007 I915_WRITE(DSPFW3,
1008 FW_WM(wm->sr.cursor, CURSOR_SR));
1009
1010 if (IS_CHERRYVIEW(dev_priv)) {
1011 I915_WRITE(DSPFW7_CHV,
1012 FW_WM_VLV(wm->pipe[PIPE_B].plane[PLANE_SPRITE1], SPRITED) |
1013 FW_WM_VLV(wm->pipe[PIPE_B].plane[PLANE_SPRITE0], SPRITEC));
1014 I915_WRITE(DSPFW8_CHV,
1015 FW_WM_VLV(wm->pipe[PIPE_C].plane[PLANE_SPRITE1], SPRITEF) |
1016 FW_WM_VLV(wm->pipe[PIPE_C].plane[PLANE_SPRITE0], SPRITEE));
1017 I915_WRITE(DSPFW9_CHV,
1018 FW_WM_VLV(wm->pipe[PIPE_C].plane[PLANE_PRIMARY], PLANEC) |
1019 FW_WM(wm->pipe[PIPE_C].plane[PLANE_CURSOR], CURSORC));
1020 I915_WRITE(DSPHOWM,
1021 FW_WM(wm->sr.plane >> 9, SR_HI) |
1022 FW_WM(wm->pipe[PIPE_C].plane[PLANE_SPRITE1] >> 8, SPRITEF_HI) |
1023 FW_WM(wm->pipe[PIPE_C].plane[PLANE_SPRITE0] >> 8, SPRITEE_HI) |
1024 FW_WM(wm->pipe[PIPE_C].plane[PLANE_PRIMARY] >> 8, PLANEC_HI) |
1025 FW_WM(wm->pipe[PIPE_B].plane[PLANE_SPRITE1] >> 8, SPRITED_HI) |
1026 FW_WM(wm->pipe[PIPE_B].plane[PLANE_SPRITE0] >> 8, SPRITEC_HI) |
1027 FW_WM(wm->pipe[PIPE_B].plane[PLANE_PRIMARY] >> 8, PLANEB_HI) |
1028 FW_WM(wm->pipe[PIPE_A].plane[PLANE_SPRITE1] >> 8, SPRITEB_HI) |
1029 FW_WM(wm->pipe[PIPE_A].plane[PLANE_SPRITE0] >> 8, SPRITEA_HI) |
1030 FW_WM(wm->pipe[PIPE_A].plane[PLANE_PRIMARY] >> 8, PLANEA_HI));
1031 } else {
1032 I915_WRITE(DSPFW7,
1033 FW_WM_VLV(wm->pipe[PIPE_B].plane[PLANE_SPRITE1], SPRITED) |
1034 FW_WM_VLV(wm->pipe[PIPE_B].plane[PLANE_SPRITE0], SPRITEC));
1035 I915_WRITE(DSPHOWM,
1036 FW_WM(wm->sr.plane >> 9, SR_HI) |
1037 FW_WM(wm->pipe[PIPE_B].plane[PLANE_SPRITE1] >> 8, SPRITED_HI) |
1038 FW_WM(wm->pipe[PIPE_B].plane[PLANE_SPRITE0] >> 8, SPRITEC_HI) |
1039 FW_WM(wm->pipe[PIPE_B].plane[PLANE_PRIMARY] >> 8, PLANEB_HI) |
1040 FW_WM(wm->pipe[PIPE_A].plane[PLANE_SPRITE1] >> 8, SPRITEB_HI) |
1041 FW_WM(wm->pipe[PIPE_A].plane[PLANE_SPRITE0] >> 8, SPRITEA_HI) |
1042 FW_WM(wm->pipe[PIPE_A].plane[PLANE_PRIMARY] >> 8, PLANEA_HI));
1043 }
1044
1045 POSTING_READ(DSPFW1);
1046 }
1047
1048 #undef FW_WM_VLV
1049
1050 static void g4x_setup_wm_latency(struct drm_i915_private *dev_priv)
1051 {
1052 /* all latencies in usec */
1053 dev_priv->wm.pri_latency[G4X_WM_LEVEL_NORMAL] = 5;
1054 dev_priv->wm.pri_latency[G4X_WM_LEVEL_SR] = 12;
1055 dev_priv->wm.pri_latency[G4X_WM_LEVEL_HPLL] = 35;
1056
1057 dev_priv->wm.max_level = G4X_WM_LEVEL_HPLL;
1058 }
1059
1060 static int g4x_plane_fifo_size(enum plane_id plane_id, int level)
1061 {
1062 /*
1063 * DSPCNTR[13] supposedly controls whether the
1064 * primary plane can use the FIFO space otherwise
1065 * reserved for the sprite plane. It's not 100% clear
1066 * what the actual FIFO size is, but it looks like we
1067 * can happily set both primary and sprite watermarks
1068 * up to 127 cachelines. So that would seem to mean
1069 * that either DSPCNTR[13] doesn't do anything, or that
1070 * the total FIFO is >= 256 cachelines in size. Either
1071 * way, we don't seem to have to worry about this
1072 * repartitioning as the maximum watermark value the
1073 * register can hold for each plane is lower than the
1074 * minimum FIFO size.
1075 */
1076 switch (plane_id) {
1077 case PLANE_CURSOR:
1078 return 63;
1079 case PLANE_PRIMARY:
1080 return level == G4X_WM_LEVEL_NORMAL ? 127 : 511;
1081 case PLANE_SPRITE0:
1082 return level == G4X_WM_LEVEL_NORMAL ? 127 : 0;
1083 default:
1084 MISSING_CASE(plane_id);
1085 return 0;
1086 }
1087 }
1088
1089 static int g4x_fbc_fifo_size(int level)
1090 {
1091 switch (level) {
1092 case G4X_WM_LEVEL_SR:
1093 return 7;
1094 case G4X_WM_LEVEL_HPLL:
1095 return 15;
1096 default:
1097 MISSING_CASE(level);
1098 return 0;
1099 }
1100 }
1101
1102 static uint16_t g4x_compute_wm(const struct intel_crtc_state *crtc_state,
1103 const struct intel_plane_state *plane_state,
1104 int level)
1105 {
1106 struct intel_plane *plane = to_intel_plane(plane_state->base.plane);
1107 struct drm_i915_private *dev_priv = to_i915(plane->base.dev);
1108 const struct drm_display_mode *adjusted_mode =
1109 &crtc_state->base.adjusted_mode;
1110 int clock, htotal, cpp, width, wm;
1111 int latency = dev_priv->wm.pri_latency[level] * 10;
1112
1113 if (latency == 0)
1114 return USHRT_MAX;
1115
1116 if (!intel_wm_plane_visible(crtc_state, plane_state))
1117 return 0;
1118
1119 /*
1120 * Not 100% sure which way ELK should go here as the
1121 * spec only says CL/CTG should assume 32bpp and BW
1122 * doesn't need to. But as these things followed the
1123 * mobile vs. desktop lines on gen3 as well, let's
1124 * assume ELK doesn't need this.
1125 *
1126 * The spec also fails to list such a restriction for
1127 * the HPLL watermark, which seems a little strange.
1128 * Let's use 32bpp for the HPLL watermark as well.
1129 */
1130 if (IS_GM45(dev_priv) && plane->id == PLANE_PRIMARY &&
1131 level != G4X_WM_LEVEL_NORMAL)
1132 cpp = 4;
1133 else
1134 cpp = plane_state->base.fb->format->cpp[0];
1135
1136 clock = adjusted_mode->crtc_clock;
1137 htotal = adjusted_mode->crtc_htotal;
1138
1139 if (plane->id == PLANE_CURSOR)
1140 width = plane_state->base.crtc_w;
1141 else
1142 width = drm_rect_width(&plane_state->base.dst);
1143
1144 if (plane->id == PLANE_CURSOR) {
1145 wm = intel_wm_method2(clock, htotal, width, cpp, latency);
1146 } else if (plane->id == PLANE_PRIMARY &&
1147 level == G4X_WM_LEVEL_NORMAL) {
1148 wm = intel_wm_method1(clock, cpp, latency);
1149 } else {
1150 int small, large;
1151
1152 small = intel_wm_method1(clock, cpp, latency);
1153 large = intel_wm_method2(clock, htotal, width, cpp, latency);
1154
1155 wm = min(small, large);
1156 }
1157
1158 wm += g4x_tlb_miss_wa(g4x_plane_fifo_size(plane->id, level),
1159 width, cpp);
1160
1161 wm = DIV_ROUND_UP(wm, 64) + 2;
1162
1163 return min_t(int, wm, USHRT_MAX);
1164 }
1165
1166 static bool g4x_raw_plane_wm_set(struct intel_crtc_state *crtc_state,
1167 int level, enum plane_id plane_id, u16 value)
1168 {
1169 struct drm_i915_private *dev_priv = to_i915(crtc_state->base.crtc->dev);
1170 bool dirty = false;
1171
1172 for (; level < intel_wm_num_levels(dev_priv); level++) {
1173 struct g4x_pipe_wm *raw = &crtc_state->wm.g4x.raw[level];
1174
1175 dirty |= raw->plane[plane_id] != value;
1176 raw->plane[plane_id] = value;
1177 }
1178
1179 return dirty;
1180 }
1181
1182 static bool g4x_raw_fbc_wm_set(struct intel_crtc_state *crtc_state,
1183 int level, u16 value)
1184 {
1185 struct drm_i915_private *dev_priv = to_i915(crtc_state->base.crtc->dev);
1186 bool dirty = false;
1187
1188 /* NORMAL level doesn't have an FBC watermark */
1189 level = max(level, G4X_WM_LEVEL_SR);
1190
1191 for (; level < intel_wm_num_levels(dev_priv); level++) {
1192 struct g4x_pipe_wm *raw = &crtc_state->wm.g4x.raw[level];
1193
1194 dirty |= raw->fbc != value;
1195 raw->fbc = value;
1196 }
1197
1198 return dirty;
1199 }
1200
1201 static uint32_t ilk_compute_fbc_wm(const struct intel_crtc_state *cstate,
1202 const struct intel_plane_state *pstate,
1203 uint32_t pri_val);
1204
1205 static bool g4x_raw_plane_wm_compute(struct intel_crtc_state *crtc_state,
1206 const struct intel_plane_state *plane_state)
1207 {
1208 struct intel_plane *plane = to_intel_plane(plane_state->base.plane);
1209 int num_levels = intel_wm_num_levels(to_i915(plane->base.dev));
1210 enum plane_id plane_id = plane->id;
1211 bool dirty = false;
1212 int level;
1213
1214 if (!intel_wm_plane_visible(crtc_state, plane_state)) {
1215 dirty |= g4x_raw_plane_wm_set(crtc_state, 0, plane_id, 0);
1216 if (plane_id == PLANE_PRIMARY)
1217 dirty |= g4x_raw_fbc_wm_set(crtc_state, 0, 0);
1218 goto out;
1219 }
1220
1221 for (level = 0; level < num_levels; level++) {
1222 struct g4x_pipe_wm *raw = &crtc_state->wm.g4x.raw[level];
1223 int wm, max_wm;
1224
1225 wm = g4x_compute_wm(crtc_state, plane_state, level);
1226 max_wm = g4x_plane_fifo_size(plane_id, level);
1227
1228 if (wm > max_wm)
1229 break;
1230
1231 dirty |= raw->plane[plane_id] != wm;
1232 raw->plane[plane_id] = wm;
1233
1234 if (plane_id != PLANE_PRIMARY ||
1235 level == G4X_WM_LEVEL_NORMAL)
1236 continue;
1237
1238 wm = ilk_compute_fbc_wm(crtc_state, plane_state,
1239 raw->plane[plane_id]);
1240 max_wm = g4x_fbc_fifo_size(level);
1241
1242 /*
1243 * FBC wm is not mandatory as we
1244 * can always just disable its use.
1245 */
1246 if (wm > max_wm)
1247 wm = USHRT_MAX;
1248
1249 dirty |= raw->fbc != wm;
1250 raw->fbc = wm;
1251 }
1252
1253 /* mark watermarks as invalid */
1254 dirty |= g4x_raw_plane_wm_set(crtc_state, level, plane_id, USHRT_MAX);
1255
1256 if (plane_id == PLANE_PRIMARY)
1257 dirty |= g4x_raw_fbc_wm_set(crtc_state, level, USHRT_MAX);
1258
1259 out:
1260 if (dirty) {
1261 DRM_DEBUG_KMS("%s watermarks: normal=%d, SR=%d, HPLL=%d\n",
1262 plane->base.name,
1263 crtc_state->wm.g4x.raw[G4X_WM_LEVEL_NORMAL].plane[plane_id],
1264 crtc_state->wm.g4x.raw[G4X_WM_LEVEL_SR].plane[plane_id],
1265 crtc_state->wm.g4x.raw[G4X_WM_LEVEL_HPLL].plane[plane_id]);
1266
1267 if (plane_id == PLANE_PRIMARY)
1268 DRM_DEBUG_KMS("FBC watermarks: SR=%d, HPLL=%d\n",
1269 crtc_state->wm.g4x.raw[G4X_WM_LEVEL_SR].fbc,
1270 crtc_state->wm.g4x.raw[G4X_WM_LEVEL_HPLL].fbc);
1271 }
1272
1273 return dirty;
1274 }
1275
1276 static bool g4x_raw_plane_wm_is_valid(const struct intel_crtc_state *crtc_state,
1277 enum plane_id plane_id, int level)
1278 {
1279 const struct g4x_pipe_wm *raw = &crtc_state->wm.g4x.raw[level];
1280
1281 return raw->plane[plane_id] <= g4x_plane_fifo_size(plane_id, level);
1282 }
1283
1284 static bool g4x_raw_crtc_wm_is_valid(const struct intel_crtc_state *crtc_state,
1285 int level)
1286 {
1287 struct drm_i915_private *dev_priv = to_i915(crtc_state->base.crtc->dev);
1288
1289 if (level > dev_priv->wm.max_level)
1290 return false;
1291
1292 return g4x_raw_plane_wm_is_valid(crtc_state, PLANE_PRIMARY, level) &&
1293 g4x_raw_plane_wm_is_valid(crtc_state, PLANE_SPRITE0, level) &&
1294 g4x_raw_plane_wm_is_valid(crtc_state, PLANE_CURSOR, level);
1295 }
1296
1297 /* mark all levels starting from 'level' as invalid */
1298 static void g4x_invalidate_wms(struct intel_crtc *crtc,
1299 struct g4x_wm_state *wm_state, int level)
1300 {
1301 if (level <= G4X_WM_LEVEL_NORMAL) {
1302 enum plane_id plane_id;
1303
1304 for_each_plane_id_on_crtc(crtc, plane_id)
1305 wm_state->wm.plane[plane_id] = USHRT_MAX;
1306 }
1307
1308 if (level <= G4X_WM_LEVEL_SR) {
1309 wm_state->cxsr = false;
1310 wm_state->sr.cursor = USHRT_MAX;
1311 wm_state->sr.plane = USHRT_MAX;
1312 wm_state->sr.fbc = USHRT_MAX;
1313 }
1314
1315 if (level <= G4X_WM_LEVEL_HPLL) {
1316 wm_state->hpll_en = false;
1317 wm_state->hpll.cursor = USHRT_MAX;
1318 wm_state->hpll.plane = USHRT_MAX;
1319 wm_state->hpll.fbc = USHRT_MAX;
1320 }
1321 }
1322
1323 static int g4x_compute_pipe_wm(struct intel_crtc_state *crtc_state)
1324 {
1325 struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc);
1326 struct intel_atomic_state *state =
1327 to_intel_atomic_state(crtc_state->base.state);
1328 struct g4x_wm_state *wm_state = &crtc_state->wm.g4x.optimal;
1329 int num_active_planes = hweight32(crtc_state->active_planes &
1330 ~BIT(PLANE_CURSOR));
1331 const struct g4x_pipe_wm *raw;
1332 const struct intel_plane_state *old_plane_state;
1333 const struct intel_plane_state *new_plane_state;
1334 struct intel_plane *plane;
1335 enum plane_id plane_id;
1336 int i, level;
1337 unsigned int dirty = 0;
1338
1339 for_each_oldnew_intel_plane_in_state(state, plane,
1340 old_plane_state,
1341 new_plane_state, i) {
1342 if (new_plane_state->base.crtc != &crtc->base &&
1343 old_plane_state->base.crtc != &crtc->base)
1344 continue;
1345
1346 if (g4x_raw_plane_wm_compute(crtc_state, new_plane_state))
1347 dirty |= BIT(plane->id);
1348 }
1349
1350 if (!dirty)
1351 return 0;
1352
1353 level = G4X_WM_LEVEL_NORMAL;
1354 if (!g4x_raw_crtc_wm_is_valid(crtc_state, level))
1355 goto out;
1356
1357 raw = &crtc_state->wm.g4x.raw[level];
1358 for_each_plane_id_on_crtc(crtc, plane_id)
1359 wm_state->wm.plane[plane_id] = raw->plane[plane_id];
1360
1361 level = G4X_WM_LEVEL_SR;
1362
1363 if (!g4x_raw_crtc_wm_is_valid(crtc_state, level))
1364 goto out;
1365
1366 raw = &crtc_state->wm.g4x.raw[level];
1367 wm_state->sr.plane = raw->plane[PLANE_PRIMARY];
1368 wm_state->sr.cursor = raw->plane[PLANE_CURSOR];
1369 wm_state->sr.fbc = raw->fbc;
1370
1371 wm_state->cxsr = num_active_planes == BIT(PLANE_PRIMARY);
1372
1373 level = G4X_WM_LEVEL_HPLL;
1374
1375 if (!g4x_raw_crtc_wm_is_valid(crtc_state, level))
1376 goto out;
1377
1378 raw = &crtc_state->wm.g4x.raw[level];
1379 wm_state->hpll.plane = raw->plane[PLANE_PRIMARY];
1380 wm_state->hpll.cursor = raw->plane[PLANE_CURSOR];
1381 wm_state->hpll.fbc = raw->fbc;
1382
1383 wm_state->hpll_en = wm_state->cxsr;
1384
1385 level++;
1386
1387 out:
1388 if (level == G4X_WM_LEVEL_NORMAL)
1389 return -EINVAL;
1390
1391 /* invalidate the higher levels */
1392 g4x_invalidate_wms(crtc, wm_state, level);
1393
1394 /*
1395 * Determine if the FBC watermark(s) can be used. IF
1396 * this isn't the case we prefer to disable the FBC
1397 ( watermark(s) rather than disable the SR/HPLL
1398 * level(s) entirely.
1399 */
1400 wm_state->fbc_en = level > G4X_WM_LEVEL_NORMAL;
1401
1402 if (level >= G4X_WM_LEVEL_SR &&
1403 wm_state->sr.fbc > g4x_fbc_fifo_size(G4X_WM_LEVEL_SR))
1404 wm_state->fbc_en = false;
1405 else if (level >= G4X_WM_LEVEL_HPLL &&
1406 wm_state->hpll.fbc > g4x_fbc_fifo_size(G4X_WM_LEVEL_HPLL))
1407 wm_state->fbc_en = false;
1408
1409 return 0;
1410 }
1411
1412 static int g4x_compute_intermediate_wm(struct drm_device *dev,
1413 struct intel_crtc *crtc,
1414 struct intel_crtc_state *crtc_state)
1415 {
1416 struct g4x_wm_state *intermediate = &crtc_state->wm.g4x.intermediate;
1417 const struct g4x_wm_state *optimal = &crtc_state->wm.g4x.optimal;
1418 const struct g4x_wm_state *active = &crtc->wm.active.g4x;
1419 enum plane_id plane_id;
1420
1421 intermediate->cxsr = optimal->cxsr && active->cxsr &&
1422 !crtc_state->disable_cxsr;
1423 intermediate->hpll_en = optimal->hpll_en && active->hpll_en &&
1424 !crtc_state->disable_cxsr;
1425 intermediate->fbc_en = optimal->fbc_en && active->fbc_en;
1426
1427 for_each_plane_id_on_crtc(crtc, plane_id) {
1428 intermediate->wm.plane[plane_id] =
1429 max(optimal->wm.plane[plane_id],
1430 active->wm.plane[plane_id]);
1431
1432 WARN_ON(intermediate->wm.plane[plane_id] >
1433 g4x_plane_fifo_size(plane_id, G4X_WM_LEVEL_NORMAL));
1434 }
1435
1436 intermediate->sr.plane = max(optimal->sr.plane,
1437 active->sr.plane);
1438 intermediate->sr.cursor = max(optimal->sr.cursor,
1439 active->sr.cursor);
1440 intermediate->sr.fbc = max(optimal->sr.fbc,
1441 active->sr.fbc);
1442
1443 intermediate->hpll.plane = max(optimal->hpll.plane,
1444 active->hpll.plane);
1445 intermediate->hpll.cursor = max(optimal->hpll.cursor,
1446 active->hpll.cursor);
1447 intermediate->hpll.fbc = max(optimal->hpll.fbc,
1448 active->hpll.fbc);
1449
1450 WARN_ON((intermediate->sr.plane >
1451 g4x_plane_fifo_size(PLANE_PRIMARY, G4X_WM_LEVEL_SR) ||
1452 intermediate->sr.cursor >
1453 g4x_plane_fifo_size(PLANE_CURSOR, G4X_WM_LEVEL_SR)) &&
1454 intermediate->cxsr);
1455 WARN_ON((intermediate->sr.plane >
1456 g4x_plane_fifo_size(PLANE_PRIMARY, G4X_WM_LEVEL_HPLL) ||
1457 intermediate->sr.cursor >
1458 g4x_plane_fifo_size(PLANE_CURSOR, G4X_WM_LEVEL_HPLL)) &&
1459 intermediate->hpll_en);
1460
1461 WARN_ON(intermediate->sr.fbc > g4x_fbc_fifo_size(1) &&
1462 intermediate->fbc_en && intermediate->cxsr);
1463 WARN_ON(intermediate->hpll.fbc > g4x_fbc_fifo_size(2) &&
1464 intermediate->fbc_en && intermediate->hpll_en);
1465
1466 /*
1467 * If our intermediate WM are identical to the final WM, then we can
1468 * omit the post-vblank programming; only update if it's different.
1469 */
1470 if (memcmp(intermediate, optimal, sizeof(*intermediate)) != 0)
1471 crtc_state->wm.need_postvbl_update = true;
1472
1473 return 0;
1474 }
1475
1476 static void g4x_merge_wm(struct drm_i915_private *dev_priv,
1477 struct g4x_wm_values *wm)
1478 {
1479 struct intel_crtc *crtc;
1480 int num_active_crtcs = 0;
1481
1482 wm->cxsr = true;
1483 wm->hpll_en = true;
1484 wm->fbc_en = true;
1485
1486 for_each_intel_crtc(&dev_priv->drm, crtc) {
1487 const struct g4x_wm_state *wm_state = &crtc->wm.active.g4x;
1488
1489 if (!crtc->active)
1490 continue;
1491
1492 if (!wm_state->cxsr)
1493 wm->cxsr = false;
1494 if (!wm_state->hpll_en)
1495 wm->hpll_en = false;
1496 if (!wm_state->fbc_en)
1497 wm->fbc_en = false;
1498
1499 num_active_crtcs++;
1500 }
1501
1502 if (num_active_crtcs != 1) {
1503 wm->cxsr = false;
1504 wm->hpll_en = false;
1505 wm->fbc_en = false;
1506 }
1507
1508 for_each_intel_crtc(&dev_priv->drm, crtc) {
1509 const struct g4x_wm_state *wm_state = &crtc->wm.active.g4x;
1510 enum pipe pipe = crtc->pipe;
1511
1512 wm->pipe[pipe] = wm_state->wm;
1513 if (crtc->active && wm->cxsr)
1514 wm->sr = wm_state->sr;
1515 if (crtc->active && wm->hpll_en)
1516 wm->hpll = wm_state->hpll;
1517 }
1518 }
1519
1520 static void g4x_program_watermarks(struct drm_i915_private *dev_priv)
1521 {
1522 struct g4x_wm_values *old_wm = &dev_priv->wm.g4x;
1523 struct g4x_wm_values new_wm = {};
1524
1525 g4x_merge_wm(dev_priv, &new_wm);
1526
1527 if (memcmp(old_wm, &new_wm, sizeof(new_wm)) == 0)
1528 return;
1529
1530 if (is_disabling(old_wm->cxsr, new_wm.cxsr, true))
1531 _intel_set_memory_cxsr(dev_priv, false);
1532
1533 g4x_write_wm_values(dev_priv, &new_wm);
1534
1535 if (is_enabling(old_wm->cxsr, new_wm.cxsr, true))
1536 _intel_set_memory_cxsr(dev_priv, true);
1537
1538 *old_wm = new_wm;
1539 }
1540
1541 static void g4x_initial_watermarks(struct intel_atomic_state *state,
1542 struct intel_crtc_state *crtc_state)
1543 {
1544 struct drm_i915_private *dev_priv = to_i915(crtc_state->base.crtc->dev);
1545 struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc);
1546
1547 mutex_lock(&dev_priv->wm.wm_mutex);
1548 crtc->wm.active.g4x = crtc_state->wm.g4x.intermediate;
1549 g4x_program_watermarks(dev_priv);
1550 mutex_unlock(&dev_priv->wm.wm_mutex);
1551 }
1552
1553 static void g4x_optimize_watermarks(struct intel_atomic_state *state,
1554 struct intel_crtc_state *crtc_state)
1555 {
1556 struct drm_i915_private *dev_priv = to_i915(crtc_state->base.crtc->dev);
1557 struct intel_crtc *intel_crtc = to_intel_crtc(crtc_state->base.crtc);
1558
1559 if (!crtc_state->wm.need_postvbl_update)
1560 return;
1561
1562 mutex_lock(&dev_priv->wm.wm_mutex);
1563 intel_crtc->wm.active.g4x = crtc_state->wm.g4x.optimal;
1564 g4x_program_watermarks(dev_priv);
1565 mutex_unlock(&dev_priv->wm.wm_mutex);
1566 }
1567
1568 /* latency must be in 0.1us units. */
1569 static unsigned int vlv_wm_method2(unsigned int pixel_rate,
1570 unsigned int htotal,
1571 unsigned int width,
1572 unsigned int cpp,
1573 unsigned int latency)
1574 {
1575 unsigned int ret;
1576
1577 ret = intel_wm_method2(pixel_rate, htotal,
1578 width, cpp, latency);
1579 ret = DIV_ROUND_UP(ret, 64);
1580
1581 return ret;
1582 }
1583
1584 static void vlv_setup_wm_latency(struct drm_i915_private *dev_priv)
1585 {
1586 /* all latencies in usec */
1587 dev_priv->wm.pri_latency[VLV_WM_LEVEL_PM2] = 3;
1588
1589 dev_priv->wm.max_level = VLV_WM_LEVEL_PM2;
1590
1591 if (IS_CHERRYVIEW(dev_priv)) {
1592 dev_priv->wm.pri_latency[VLV_WM_LEVEL_PM5] = 12;
1593 dev_priv->wm.pri_latency[VLV_WM_LEVEL_DDR_DVFS] = 33;
1594
1595 dev_priv->wm.max_level = VLV_WM_LEVEL_DDR_DVFS;
1596 }
1597 }
1598
1599 static uint16_t vlv_compute_wm_level(const struct intel_crtc_state *crtc_state,
1600 const struct intel_plane_state *plane_state,
1601 int level)
1602 {
1603 struct intel_plane *plane = to_intel_plane(plane_state->base.plane);
1604 struct drm_i915_private *dev_priv = to_i915(plane->base.dev);
1605 const struct drm_display_mode *adjusted_mode =
1606 &crtc_state->base.adjusted_mode;
1607 int clock, htotal, cpp, width, wm;
1608
1609 if (dev_priv->wm.pri_latency[level] == 0)
1610 return USHRT_MAX;
1611
1612 if (!intel_wm_plane_visible(crtc_state, plane_state))
1613 return 0;
1614
1615 cpp = plane_state->base.fb->format->cpp[0];
1616 clock = adjusted_mode->crtc_clock;
1617 htotal = adjusted_mode->crtc_htotal;
1618 width = crtc_state->pipe_src_w;
1619
1620 if (plane->id == PLANE_CURSOR) {
1621 /*
1622 * FIXME the formula gives values that are
1623 * too big for the cursor FIFO, and hence we
1624 * would never be able to use cursors. For
1625 * now just hardcode the watermark.
1626 */
1627 wm = 63;
1628 } else {
1629 wm = vlv_wm_method2(clock, htotal, width, cpp,
1630 dev_priv->wm.pri_latency[level] * 10);
1631 }
1632
1633 return min_t(int, wm, USHRT_MAX);
1634 }
1635
1636 static bool vlv_need_sprite0_fifo_workaround(unsigned int active_planes)
1637 {
1638 return (active_planes & (BIT(PLANE_SPRITE0) |
1639 BIT(PLANE_SPRITE1))) == BIT(PLANE_SPRITE1);
1640 }
1641
1642 static int vlv_compute_fifo(struct intel_crtc_state *crtc_state)
1643 {
1644 struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc);
1645 const struct g4x_pipe_wm *raw =
1646 &crtc_state->wm.vlv.raw[VLV_WM_LEVEL_PM2];
1647 struct vlv_fifo_state *fifo_state = &crtc_state->wm.vlv.fifo_state;
1648 unsigned int active_planes = crtc_state->active_planes & ~BIT(PLANE_CURSOR);
1649 int num_active_planes = hweight32(active_planes);
1650 const int fifo_size = 511;
1651 int fifo_extra, fifo_left = fifo_size;
1652 int sprite0_fifo_extra = 0;
1653 unsigned int total_rate;
1654 enum plane_id plane_id;
1655
1656 /*
1657 * When enabling sprite0 after sprite1 has already been enabled
1658 * we tend to get an underrun unless sprite0 already has some
1659 * FIFO space allcoated. Hence we always allocate at least one
1660 * cacheline for sprite0 whenever sprite1 is enabled.
1661 *
1662 * All other plane enable sequences appear immune to this problem.
1663 */
1664 if (vlv_need_sprite0_fifo_workaround(active_planes))
1665 sprite0_fifo_extra = 1;
1666
1667 total_rate = raw->plane[PLANE_PRIMARY] +
1668 raw->plane[PLANE_SPRITE0] +
1669 raw->plane[PLANE_SPRITE1] +
1670 sprite0_fifo_extra;
1671
1672 if (total_rate > fifo_size)
1673 return -EINVAL;
1674
1675 if (total_rate == 0)
1676 total_rate = 1;
1677
1678 for_each_plane_id_on_crtc(crtc, plane_id) {
1679 unsigned int rate;
1680
1681 if ((active_planes & BIT(plane_id)) == 0) {
1682 fifo_state->plane[plane_id] = 0;
1683 continue;
1684 }
1685
1686 rate = raw->plane[plane_id];
1687 fifo_state->plane[plane_id] = fifo_size * rate / total_rate;
1688 fifo_left -= fifo_state->plane[plane_id];
1689 }
1690
1691 fifo_state->plane[PLANE_SPRITE0] += sprite0_fifo_extra;
1692 fifo_left -= sprite0_fifo_extra;
1693
1694 fifo_state->plane[PLANE_CURSOR] = 63;
1695
1696 fifo_extra = DIV_ROUND_UP(fifo_left, num_active_planes ?: 1);
1697
1698 /* spread the remainder evenly */
1699 for_each_plane_id_on_crtc(crtc, plane_id) {
1700 int plane_extra;
1701
1702 if (fifo_left == 0)
1703 break;
1704
1705 if ((active_planes & BIT(plane_id)) == 0)
1706 continue;
1707
1708 plane_extra = min(fifo_extra, fifo_left);
1709 fifo_state->plane[plane_id] += plane_extra;
1710 fifo_left -= plane_extra;
1711 }
1712
1713 WARN_ON(active_planes != 0 && fifo_left != 0);
1714
1715 /* give it all to the first plane if none are active */
1716 if (active_planes == 0) {
1717 WARN_ON(fifo_left != fifo_size);
1718 fifo_state->plane[PLANE_PRIMARY] = fifo_left;
1719 }
1720
1721 return 0;
1722 }
1723
1724 /* mark all levels starting from 'level' as invalid */
1725 static void vlv_invalidate_wms(struct intel_crtc *crtc,
1726 struct vlv_wm_state *wm_state, int level)
1727 {
1728 struct drm_i915_private *dev_priv = to_i915(crtc->base.dev);
1729
1730 for (; level < intel_wm_num_levels(dev_priv); level++) {
1731 enum plane_id plane_id;
1732
1733 for_each_plane_id_on_crtc(crtc, plane_id)
1734 wm_state->wm[level].plane[plane_id] = USHRT_MAX;
1735
1736 wm_state->sr[level].cursor = USHRT_MAX;
1737 wm_state->sr[level].plane = USHRT_MAX;
1738 }
1739 }
1740
1741 static u16 vlv_invert_wm_value(u16 wm, u16 fifo_size)
1742 {
1743 if (wm > fifo_size)
1744 return USHRT_MAX;
1745 else
1746 return fifo_size - wm;
1747 }
1748
1749 /*
1750 * Starting from 'level' set all higher
1751 * levels to 'value' in the "raw" watermarks.
1752 */
1753 static bool vlv_raw_plane_wm_set(struct intel_crtc_state *crtc_state,
1754 int level, enum plane_id plane_id, u16 value)
1755 {
1756 struct drm_i915_private *dev_priv = to_i915(crtc_state->base.crtc->dev);
1757 int num_levels = intel_wm_num_levels(dev_priv);
1758 bool dirty = false;
1759
1760 for (; level < num_levels; level++) {
1761 struct g4x_pipe_wm *raw = &crtc_state->wm.vlv.raw[level];
1762
1763 dirty |= raw->plane[plane_id] != value;
1764 raw->plane[plane_id] = value;
1765 }
1766
1767 return dirty;
1768 }
1769
1770 static bool vlv_raw_plane_wm_compute(struct intel_crtc_state *crtc_state,
1771 const struct intel_plane_state *plane_state)
1772 {
1773 struct intel_plane *plane = to_intel_plane(plane_state->base.plane);
1774 enum plane_id plane_id = plane->id;
1775 int num_levels = intel_wm_num_levels(to_i915(plane->base.dev));
1776 int level;
1777 bool dirty = false;
1778
1779 if (!intel_wm_plane_visible(crtc_state, plane_state)) {
1780 dirty |= vlv_raw_plane_wm_set(crtc_state, 0, plane_id, 0);
1781 goto out;
1782 }
1783
1784 for (level = 0; level < num_levels; level++) {
1785 struct g4x_pipe_wm *raw = &crtc_state->wm.vlv.raw[level];
1786 int wm = vlv_compute_wm_level(crtc_state, plane_state, level);
1787 int max_wm = plane_id == PLANE_CURSOR ? 63 : 511;
1788
1789 if (wm > max_wm)
1790 break;
1791
1792 dirty |= raw->plane[plane_id] != wm;
1793 raw->plane[plane_id] = wm;
1794 }
1795
1796 /* mark all higher levels as invalid */
1797 dirty |= vlv_raw_plane_wm_set(crtc_state, level, plane_id, USHRT_MAX);
1798
1799 out:
1800 if (dirty)
1801 DRM_DEBUG_KMS("%s watermarks: PM2=%d, PM5=%d, DDR DVFS=%d\n",
1802 plane->base.name,
1803 crtc_state->wm.vlv.raw[VLV_WM_LEVEL_PM2].plane[plane_id],
1804 crtc_state->wm.vlv.raw[VLV_WM_LEVEL_PM5].plane[plane_id],
1805 crtc_state->wm.vlv.raw[VLV_WM_LEVEL_DDR_DVFS].plane[plane_id]);
1806
1807 return dirty;
1808 }
1809
1810 static bool vlv_raw_plane_wm_is_valid(const struct intel_crtc_state *crtc_state,
1811 enum plane_id plane_id, int level)
1812 {
1813 const struct g4x_pipe_wm *raw =
1814 &crtc_state->wm.vlv.raw[level];
1815 const struct vlv_fifo_state *fifo_state =
1816 &crtc_state->wm.vlv.fifo_state;
1817
1818 return raw->plane[plane_id] <= fifo_state->plane[plane_id];
1819 }
1820
1821 static bool vlv_raw_crtc_wm_is_valid(const struct intel_crtc_state *crtc_state, int level)
1822 {
1823 return vlv_raw_plane_wm_is_valid(crtc_state, PLANE_PRIMARY, level) &&
1824 vlv_raw_plane_wm_is_valid(crtc_state, PLANE_SPRITE0, level) &&
1825 vlv_raw_plane_wm_is_valid(crtc_state, PLANE_SPRITE1, level) &&
1826 vlv_raw_plane_wm_is_valid(crtc_state, PLANE_CURSOR, level);
1827 }
1828
1829 static int vlv_compute_pipe_wm(struct intel_crtc_state *crtc_state)
1830 {
1831 struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc);
1832 struct drm_i915_private *dev_priv = to_i915(crtc->base.dev);
1833 struct intel_atomic_state *state =
1834 to_intel_atomic_state(crtc_state->base.state);
1835 struct vlv_wm_state *wm_state = &crtc_state->wm.vlv.optimal;
1836 const struct vlv_fifo_state *fifo_state =
1837 &crtc_state->wm.vlv.fifo_state;
1838 int num_active_planes = hweight32(crtc_state->active_planes &
1839 ~BIT(PLANE_CURSOR));
1840 bool needs_modeset = drm_atomic_crtc_needs_modeset(&crtc_state->base);
1841 const struct intel_plane_state *old_plane_state;
1842 const struct intel_plane_state *new_plane_state;
1843 struct intel_plane *plane;
1844 enum plane_id plane_id;
1845 int level, ret, i;
1846 unsigned int dirty = 0;
1847
1848 for_each_oldnew_intel_plane_in_state(state, plane,
1849 old_plane_state,
1850 new_plane_state, i) {
1851 if (new_plane_state->base.crtc != &crtc->base &&
1852 old_plane_state->base.crtc != &crtc->base)
1853 continue;
1854
1855 if (vlv_raw_plane_wm_compute(crtc_state, new_plane_state))
1856 dirty |= BIT(plane->id);
1857 }
1858
1859 /*
1860 * DSPARB registers may have been reset due to the
1861 * power well being turned off. Make sure we restore
1862 * them to a consistent state even if no primary/sprite
1863 * planes are initially active.
1864 */
1865 if (needs_modeset)
1866 crtc_state->fifo_changed = true;
1867
1868 if (!dirty)
1869 return 0;
1870
1871 /* cursor changes don't warrant a FIFO recompute */
1872 if (dirty & ~BIT(PLANE_CURSOR)) {
1873 const struct intel_crtc_state *old_crtc_state =
1874 intel_atomic_get_old_crtc_state(state, crtc);
1875 const struct vlv_fifo_state *old_fifo_state =
1876 &old_crtc_state->wm.vlv.fifo_state;
1877
1878 ret = vlv_compute_fifo(crtc_state);
1879 if (ret)
1880 return ret;
1881
1882 if (needs_modeset ||
1883 memcmp(old_fifo_state, fifo_state,
1884 sizeof(*fifo_state)) != 0)
1885 crtc_state->fifo_changed = true;
1886 }
1887
1888 /* initially allow all levels */
1889 wm_state->num_levels = intel_wm_num_levels(dev_priv);
1890 /*
1891 * Note that enabling cxsr with no primary/sprite planes
1892 * enabled can wedge the pipe. Hence we only allow cxsr
1893 * with exactly one enabled primary/sprite plane.
1894 */
1895 wm_state->cxsr = crtc->pipe != PIPE_C && num_active_planes == 1;
1896
1897 for (level = 0; level < wm_state->num_levels; level++) {
1898 const struct g4x_pipe_wm *raw = &crtc_state->wm.vlv.raw[level];
1899 const int sr_fifo_size = INTEL_INFO(dev_priv)->num_pipes * 512 - 1;
1900
1901 if (!vlv_raw_crtc_wm_is_valid(crtc_state, level))
1902 break;
1903
1904 for_each_plane_id_on_crtc(crtc, plane_id) {
1905 wm_state->wm[level].plane[plane_id] =
1906 vlv_invert_wm_value(raw->plane[plane_id],
1907 fifo_state->plane[plane_id]);
1908 }
1909
1910 wm_state->sr[level].plane =
1911 vlv_invert_wm_value(max3(raw->plane[PLANE_PRIMARY],
1912 raw->plane[PLANE_SPRITE0],
1913 raw->plane[PLANE_SPRITE1]),
1914 sr_fifo_size);
1915
1916 wm_state->sr[level].cursor =
1917 vlv_invert_wm_value(raw->plane[PLANE_CURSOR],
1918 63);
1919 }
1920
1921 if (level == 0)
1922 return -EINVAL;
1923
1924 /* limit to only levels we can actually handle */
1925 wm_state->num_levels = level;
1926
1927 /* invalidate the higher levels */
1928 vlv_invalidate_wms(crtc, wm_state, level);
1929
1930 return 0;
1931 }
1932
1933 #define VLV_FIFO(plane, value) \
1934 (((value) << DSPARB_ ## plane ## _SHIFT_VLV) & DSPARB_ ## plane ## _MASK_VLV)
1935
1936 static void vlv_atomic_update_fifo(struct intel_atomic_state *state,
1937 struct intel_crtc_state *crtc_state)
1938 {
1939 struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc);
1940 struct drm_i915_private *dev_priv = to_i915(crtc->base.dev);
1941 const struct vlv_fifo_state *fifo_state =
1942 &crtc_state->wm.vlv.fifo_state;
1943 int sprite0_start, sprite1_start, fifo_size;
1944
1945 if (!crtc_state->fifo_changed)
1946 return;
1947
1948 sprite0_start = fifo_state->plane[PLANE_PRIMARY];
1949 sprite1_start = fifo_state->plane[PLANE_SPRITE0] + sprite0_start;
1950 fifo_size = fifo_state->plane[PLANE_SPRITE1] + sprite1_start;
1951
1952 WARN_ON(fifo_state->plane[PLANE_CURSOR] != 63);
1953 WARN_ON(fifo_size != 511);
1954
1955 trace_vlv_fifo_size(crtc, sprite0_start, sprite1_start, fifo_size);
1956
1957 /*
1958 * uncore.lock serves a double purpose here. It allows us to
1959 * use the less expensive I915_{READ,WRITE}_FW() functions, and
1960 * it protects the DSPARB registers from getting clobbered by
1961 * parallel updates from multiple pipes.
1962 *
1963 * intel_pipe_update_start() has already disabled interrupts
1964 * for us, so a plain spin_lock() is sufficient here.
1965 */
1966 spin_lock(&dev_priv->uncore.lock);
1967
1968 switch (crtc->pipe) {
1969 uint32_t dsparb, dsparb2, dsparb3;
1970 case PIPE_A:
1971 dsparb = I915_READ_FW(DSPARB);
1972 dsparb2 = I915_READ_FW(DSPARB2);
1973
1974 dsparb &= ~(VLV_FIFO(SPRITEA, 0xff) |
1975 VLV_FIFO(SPRITEB, 0xff));
1976 dsparb |= (VLV_FIFO(SPRITEA, sprite0_start) |
1977 VLV_FIFO(SPRITEB, sprite1_start));
1978
1979 dsparb2 &= ~(VLV_FIFO(SPRITEA_HI, 0x1) |
1980 VLV_FIFO(SPRITEB_HI, 0x1));
1981 dsparb2 |= (VLV_FIFO(SPRITEA_HI, sprite0_start >> 8) |
1982 VLV_FIFO(SPRITEB_HI, sprite1_start >> 8));
1983
1984 I915_WRITE_FW(DSPARB, dsparb);
1985 I915_WRITE_FW(DSPARB2, dsparb2);
1986 break;
1987 case PIPE_B:
1988 dsparb = I915_READ_FW(DSPARB);
1989 dsparb2 = I915_READ_FW(DSPARB2);
1990
1991 dsparb &= ~(VLV_FIFO(SPRITEC, 0xff) |
1992 VLV_FIFO(SPRITED, 0xff));
1993 dsparb |= (VLV_FIFO(SPRITEC, sprite0_start) |
1994 VLV_FIFO(SPRITED, sprite1_start));
1995
1996 dsparb2 &= ~(VLV_FIFO(SPRITEC_HI, 0xff) |
1997 VLV_FIFO(SPRITED_HI, 0xff));
1998 dsparb2 |= (VLV_FIFO(SPRITEC_HI, sprite0_start >> 8) |
1999 VLV_FIFO(SPRITED_HI, sprite1_start >> 8));
2000
2001 I915_WRITE_FW(DSPARB, dsparb);
2002 I915_WRITE_FW(DSPARB2, dsparb2);
2003 break;
2004 case PIPE_C:
2005 dsparb3 = I915_READ_FW(DSPARB3);
2006 dsparb2 = I915_READ_FW(DSPARB2);
2007
2008 dsparb3 &= ~(VLV_FIFO(SPRITEE, 0xff) |
2009 VLV_FIFO(SPRITEF, 0xff));
2010 dsparb3 |= (VLV_FIFO(SPRITEE, sprite0_start) |
2011 VLV_FIFO(SPRITEF, sprite1_start));
2012
2013 dsparb2 &= ~(VLV_FIFO(SPRITEE_HI, 0xff) |
2014 VLV_FIFO(SPRITEF_HI, 0xff));
2015 dsparb2 |= (VLV_FIFO(SPRITEE_HI, sprite0_start >> 8) |
2016 VLV_FIFO(SPRITEF_HI, sprite1_start >> 8));
2017
2018 I915_WRITE_FW(DSPARB3, dsparb3);
2019 I915_WRITE_FW(DSPARB2, dsparb2);
2020 break;
2021 default:
2022 break;
2023 }
2024
2025 POSTING_READ_FW(DSPARB);
2026
2027 spin_unlock(&dev_priv->uncore.lock);
2028 }
2029
2030 #undef VLV_FIFO
2031
2032 static int vlv_compute_intermediate_wm(struct drm_device *dev,
2033 struct intel_crtc *crtc,
2034 struct intel_crtc_state *crtc_state)
2035 {
2036 struct vlv_wm_state *intermediate = &crtc_state->wm.vlv.intermediate;
2037 const struct vlv_wm_state *optimal = &crtc_state->wm.vlv.optimal;
2038 const struct vlv_wm_state *active = &crtc->wm.active.vlv;
2039 int level;
2040
2041 intermediate->num_levels = min(optimal->num_levels, active->num_levels);
2042 intermediate->cxsr = optimal->cxsr && active->cxsr &&
2043 !crtc_state->disable_cxsr;
2044
2045 for (level = 0; level < intermediate->num_levels; level++) {
2046 enum plane_id plane_id;
2047
2048 for_each_plane_id_on_crtc(crtc, plane_id) {
2049 intermediate->wm[level].plane[plane_id] =
2050 min(optimal->wm[level].plane[plane_id],
2051 active->wm[level].plane[plane_id]);
2052 }
2053
2054 intermediate->sr[level].plane = min(optimal->sr[level].plane,
2055 active->sr[level].plane);
2056 intermediate->sr[level].cursor = min(optimal->sr[level].cursor,
2057 active->sr[level].cursor);
2058 }
2059
2060 vlv_invalidate_wms(crtc, intermediate, level);
2061
2062 /*
2063 * If our intermediate WM are identical to the final WM, then we can
2064 * omit the post-vblank programming; only update if it's different.
2065 */
2066 if (memcmp(intermediate, optimal, sizeof(*intermediate)) != 0)
2067 crtc_state->wm.need_postvbl_update = true;
2068
2069 return 0;
2070 }
2071
2072 static void vlv_merge_wm(struct drm_i915_private *dev_priv,
2073 struct vlv_wm_values *wm)
2074 {
2075 struct intel_crtc *crtc;
2076 int num_active_crtcs = 0;
2077
2078 wm->level = dev_priv->wm.max_level;
2079 wm->cxsr = true;
2080
2081 for_each_intel_crtc(&dev_priv->drm, crtc) {
2082 const struct vlv_wm_state *wm_state = &crtc->wm.active.vlv;
2083
2084 if (!crtc->active)
2085 continue;
2086
2087 if (!wm_state->cxsr)
2088 wm->cxsr = false;
2089
2090 num_active_crtcs++;
2091 wm->level = min_t(int, wm->level, wm_state->num_levels - 1);
2092 }
2093
2094 if (num_active_crtcs != 1)
2095 wm->cxsr = false;
2096
2097 if (num_active_crtcs > 1)
2098 wm->level = VLV_WM_LEVEL_PM2;
2099
2100 for_each_intel_crtc(&dev_priv->drm, crtc) {
2101 const struct vlv_wm_state *wm_state = &crtc->wm.active.vlv;
2102 enum pipe pipe = crtc->pipe;
2103
2104 wm->pipe[pipe] = wm_state->wm[wm->level];
2105 if (crtc->active && wm->cxsr)
2106 wm->sr = wm_state->sr[wm->level];
2107
2108 wm->ddl[pipe].plane[PLANE_PRIMARY] = DDL_PRECISION_HIGH | 2;
2109 wm->ddl[pipe].plane[PLANE_SPRITE0] = DDL_PRECISION_HIGH | 2;
2110 wm->ddl[pipe].plane[PLANE_SPRITE1] = DDL_PRECISION_HIGH | 2;
2111 wm->ddl[pipe].plane[PLANE_CURSOR] = DDL_PRECISION_HIGH | 2;
2112 }
2113 }
2114
2115 static void vlv_program_watermarks(struct drm_i915_private *dev_priv)
2116 {
2117 struct vlv_wm_values *old_wm = &dev_priv->wm.vlv;
2118 struct vlv_wm_values new_wm = {};
2119
2120 vlv_merge_wm(dev_priv, &new_wm);
2121
2122 if (memcmp(old_wm, &new_wm, sizeof(new_wm)) == 0)
2123 return;
2124
2125 if (is_disabling(old_wm->level, new_wm.level, VLV_WM_LEVEL_DDR_DVFS))
2126 chv_set_memory_dvfs(dev_priv, false);
2127
2128 if (is_disabling(old_wm->level, new_wm.level, VLV_WM_LEVEL_PM5))
2129 chv_set_memory_pm5(dev_priv, false);
2130
2131 if (is_disabling(old_wm->cxsr, new_wm.cxsr, true))
2132 _intel_set_memory_cxsr(dev_priv, false);
2133
2134 vlv_write_wm_values(dev_priv, &new_wm);
2135
2136 if (is_enabling(old_wm->cxsr, new_wm.cxsr, true))
2137 _intel_set_memory_cxsr(dev_priv, true);
2138
2139 if (is_enabling(old_wm->level, new_wm.level, VLV_WM_LEVEL_PM5))
2140 chv_set_memory_pm5(dev_priv, true);
2141
2142 if (is_enabling(old_wm->level, new_wm.level, VLV_WM_LEVEL_DDR_DVFS))
2143 chv_set_memory_dvfs(dev_priv, true);
2144
2145 *old_wm = new_wm;
2146 }
2147
2148 static void vlv_initial_watermarks(struct intel_atomic_state *state,
2149 struct intel_crtc_state *crtc_state)
2150 {
2151 struct drm_i915_private *dev_priv = to_i915(crtc_state->base.crtc->dev);
2152 struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc);
2153
2154 mutex_lock(&dev_priv->wm.wm_mutex);
2155 crtc->wm.active.vlv = crtc_state->wm.vlv.intermediate;
2156 vlv_program_watermarks(dev_priv);
2157 mutex_unlock(&dev_priv->wm.wm_mutex);
2158 }
2159
2160 static void vlv_optimize_watermarks(struct intel_atomic_state *state,
2161 struct intel_crtc_state *crtc_state)
2162 {
2163 struct drm_i915_private *dev_priv = to_i915(crtc_state->base.crtc->dev);
2164 struct intel_crtc *intel_crtc = to_intel_crtc(crtc_state->base.crtc);
2165
2166 if (!crtc_state->wm.need_postvbl_update)
2167 return;
2168
2169 mutex_lock(&dev_priv->wm.wm_mutex);
2170 intel_crtc->wm.active.vlv = crtc_state->wm.vlv.optimal;
2171 vlv_program_watermarks(dev_priv);
2172 mutex_unlock(&dev_priv->wm.wm_mutex);
2173 }
2174
2175 static void i965_update_wm(struct intel_crtc *unused_crtc)
2176 {
2177 struct drm_i915_private *dev_priv = to_i915(unused_crtc->base.dev);
2178 struct intel_crtc *crtc;
2179 int srwm = 1;
2180 int cursor_sr = 16;
2181 bool cxsr_enabled;
2182
2183 /* Calc sr entries for one plane configs */
2184 crtc = single_enabled_crtc(dev_priv);
2185 if (crtc) {
2186 /* self-refresh has much higher latency */
2187 static const int sr_latency_ns = 12000;
2188 const struct drm_display_mode *adjusted_mode =
2189 &crtc->config->base.adjusted_mode;
2190 const struct drm_framebuffer *fb =
2191 crtc->base.primary->state->fb;
2192 int clock = adjusted_mode->crtc_clock;
2193 int htotal = adjusted_mode->crtc_htotal;
2194 int hdisplay = crtc->config->pipe_src_w;
2195 int cpp = fb->format->cpp[0];
2196 int entries;
2197
2198 entries = intel_wm_method2(clock, htotal,
2199 hdisplay, cpp, sr_latency_ns / 100);
2200 entries = DIV_ROUND_UP(entries, I915_FIFO_LINE_SIZE);
2201 srwm = I965_FIFO_SIZE - entries;
2202 if (srwm < 0)
2203 srwm = 1;
2204 srwm &= 0x1ff;
2205 DRM_DEBUG_KMS("self-refresh entries: %d, wm: %d\n",
2206 entries, srwm);
2207
2208 entries = intel_wm_method2(clock, htotal,
2209 crtc->base.cursor->state->crtc_w, 4,
2210 sr_latency_ns / 100);
2211 entries = DIV_ROUND_UP(entries,
2212 i965_cursor_wm_info.cacheline_size) +
2213 i965_cursor_wm_info.guard_size;
2214
2215 cursor_sr = i965_cursor_wm_info.fifo_size - entries;
2216 if (cursor_sr > i965_cursor_wm_info.max_wm)
2217 cursor_sr = i965_cursor_wm_info.max_wm;
2218
2219 DRM_DEBUG_KMS("self-refresh watermark: display plane %d "
2220 "cursor %d\n", srwm, cursor_sr);
2221
2222 cxsr_enabled = true;
2223 } else {
2224 cxsr_enabled = false;
2225 /* Turn off self refresh if both pipes are enabled */
2226 intel_set_memory_cxsr(dev_priv, false);
2227 }
2228
2229 DRM_DEBUG_KMS("Setting FIFO watermarks - A: 8, B: 8, C: 8, SR %d\n",
2230 srwm);
2231
2232 /* 965 has limitations... */
2233 I915_WRITE(DSPFW1, FW_WM(srwm, SR) |
2234 FW_WM(8, CURSORB) |
2235 FW_WM(8, PLANEB) |
2236 FW_WM(8, PLANEA));
2237 I915_WRITE(DSPFW2, FW_WM(8, CURSORA) |
2238 FW_WM(8, PLANEC_OLD));
2239 /* update cursor SR watermark */
2240 I915_WRITE(DSPFW3, FW_WM(cursor_sr, CURSOR_SR));
2241
2242 if (cxsr_enabled)
2243 intel_set_memory_cxsr(dev_priv, true);
2244 }
2245
2246 #undef FW_WM
2247
2248 static void i9xx_update_wm(struct intel_crtc *unused_crtc)
2249 {
2250 struct drm_i915_private *dev_priv = to_i915(unused_crtc->base.dev);
2251 const struct intel_watermark_params *wm_info;
2252 uint32_t fwater_lo;
2253 uint32_t fwater_hi;
2254 int cwm, srwm = 1;
2255 int fifo_size;
2256 int planea_wm, planeb_wm;
2257 struct intel_crtc *crtc, *enabled = NULL;
2258
2259 if (IS_I945GM(dev_priv))
2260 wm_info = &i945_wm_info;
2261 else if (!IS_GEN2(dev_priv))
2262 wm_info = &i915_wm_info;
2263 else
2264 wm_info = &i830_a_wm_info;
2265
2266 fifo_size = dev_priv->display.get_fifo_size(dev_priv, 0);
2267 crtc = intel_get_crtc_for_plane(dev_priv, 0);
2268 if (intel_crtc_active(crtc)) {
2269 const struct drm_display_mode *adjusted_mode =
2270 &crtc->config->base.adjusted_mode;
2271 const struct drm_framebuffer *fb =
2272 crtc->base.primary->state->fb;
2273 int cpp;
2274
2275 if (IS_GEN2(dev_priv))
2276 cpp = 4;
2277 else
2278 cpp = fb->format->cpp[0];
2279
2280 planea_wm = intel_calculate_wm(adjusted_mode->crtc_clock,
2281 wm_info, fifo_size, cpp,
2282 pessimal_latency_ns);
2283 enabled = crtc;
2284 } else {
2285 planea_wm = fifo_size - wm_info->guard_size;
2286 if (planea_wm > (long)wm_info->max_wm)
2287 planea_wm = wm_info->max_wm;
2288 }
2289
2290 if (IS_GEN2(dev_priv))
2291 wm_info = &i830_bc_wm_info;
2292
2293 fifo_size = dev_priv->display.get_fifo_size(dev_priv, 1);
2294 crtc = intel_get_crtc_for_plane(dev_priv, 1);
2295 if (intel_crtc_active(crtc)) {
2296 const struct drm_display_mode *adjusted_mode =
2297 &crtc->config->base.adjusted_mode;
2298 const struct drm_framebuffer *fb =
2299 crtc->base.primary->state->fb;
2300 int cpp;
2301
2302 if (IS_GEN2(dev_priv))
2303 cpp = 4;
2304 else
2305 cpp = fb->format->cpp[0];
2306
2307 planeb_wm = intel_calculate_wm(adjusted_mode->crtc_clock,
2308 wm_info, fifo_size, cpp,
2309 pessimal_latency_ns);
2310 if (enabled == NULL)
2311 enabled = crtc;
2312 else
2313 enabled = NULL;
2314 } else {
2315 planeb_wm = fifo_size - wm_info->guard_size;
2316 if (planeb_wm > (long)wm_info->max_wm)
2317 planeb_wm = wm_info->max_wm;
2318 }
2319
2320 DRM_DEBUG_KMS("FIFO watermarks - A: %d, B: %d\n", planea_wm, planeb_wm);
2321
2322 if (IS_I915GM(dev_priv) && enabled) {
2323 struct drm_i915_gem_object *obj;
2324
2325 obj = intel_fb_obj(enabled->base.primary->state->fb);
2326
2327 /* self-refresh seems busted with untiled */
2328 if (!i915_gem_object_is_tiled(obj))
2329 enabled = NULL;
2330 }
2331
2332 /*
2333 * Overlay gets an aggressive default since video jitter is bad.
2334 */
2335 cwm = 2;
2336
2337 /* Play safe and disable self-refresh before adjusting watermarks. */
2338 intel_set_memory_cxsr(dev_priv, false);
2339
2340 /* Calc sr entries for one plane configs */
2341 if (HAS_FW_BLC(dev_priv) && enabled) {
2342 /* self-refresh has much higher latency */
2343 static const int sr_latency_ns = 6000;
2344 const struct drm_display_mode *adjusted_mode =
2345 &enabled->config->base.adjusted_mode;
2346 const struct drm_framebuffer *fb =
2347 enabled->base.primary->state->fb;
2348 int clock = adjusted_mode->crtc_clock;
2349 int htotal = adjusted_mode->crtc_htotal;
2350 int hdisplay = enabled->config->pipe_src_w;
2351 int cpp;
2352 int entries;
2353
2354 if (IS_I915GM(dev_priv) || IS_I945GM(dev_priv))
2355 cpp = 4;
2356 else
2357 cpp = fb->format->cpp[0];
2358
2359 entries = intel_wm_method2(clock, htotal, hdisplay, cpp,
2360 sr_latency_ns / 100);
2361 entries = DIV_ROUND_UP(entries, wm_info->cacheline_size);
2362 DRM_DEBUG_KMS("self-refresh entries: %d\n", entries);
2363 srwm = wm_info->fifo_size - entries;
2364 if (srwm < 0)
2365 srwm = 1;
2366
2367 if (IS_I945G(dev_priv) || IS_I945GM(dev_priv))
2368 I915_WRITE(FW_BLC_SELF,
2369 FW_BLC_SELF_FIFO_MASK | (srwm & 0xff));
2370 else
2371 I915_WRITE(FW_BLC_SELF, srwm & 0x3f);
2372 }
2373
2374 DRM_DEBUG_KMS("Setting FIFO watermarks - A: %d, B: %d, C: %d, SR %d\n",
2375 planea_wm, planeb_wm, cwm, srwm);
2376
2377 fwater_lo = ((planeb_wm & 0x3f) << 16) | (planea_wm & 0x3f);
2378 fwater_hi = (cwm & 0x1f);
2379
2380 /* Set request length to 8 cachelines per fetch */
2381 fwater_lo = fwater_lo | (1 << 24) | (1 << 8);
2382 fwater_hi = fwater_hi | (1 << 8);
2383
2384 I915_WRITE(FW_BLC, fwater_lo);
2385 I915_WRITE(FW_BLC2, fwater_hi);
2386
2387 if (enabled)
2388 intel_set_memory_cxsr(dev_priv, true);
2389 }
2390
2391 static void i845_update_wm(struct intel_crtc *unused_crtc)
2392 {
2393 struct drm_i915_private *dev_priv = to_i915(unused_crtc->base.dev);
2394 struct intel_crtc *crtc;
2395 const struct drm_display_mode *adjusted_mode;
2396 uint32_t fwater_lo;
2397 int planea_wm;
2398
2399 crtc = single_enabled_crtc(dev_priv);
2400 if (crtc == NULL)
2401 return;
2402
2403 adjusted_mode = &crtc->config->base.adjusted_mode;
2404 planea_wm = intel_calculate_wm(adjusted_mode->crtc_clock,
2405 &i845_wm_info,
2406 dev_priv->display.get_fifo_size(dev_priv, 0),
2407 4, pessimal_latency_ns);
2408 fwater_lo = I915_READ(FW_BLC) & ~0xfff;
2409 fwater_lo |= (3<<8) | planea_wm;
2410
2411 DRM_DEBUG_KMS("Setting FIFO watermarks - A: %d\n", planea_wm);
2412
2413 I915_WRITE(FW_BLC, fwater_lo);
2414 }
2415
2416 /* latency must be in 0.1us units. */
2417 static unsigned int ilk_wm_method1(unsigned int pixel_rate,
2418 unsigned int cpp,
2419 unsigned int latency)
2420 {
2421 unsigned int ret;
2422
2423 ret = intel_wm_method1(pixel_rate, cpp, latency);
2424 ret = DIV_ROUND_UP(ret, 64) + 2;
2425
2426 return ret;
2427 }
2428
2429 /* latency must be in 0.1us units. */
2430 static unsigned int ilk_wm_method2(unsigned int pixel_rate,
2431 unsigned int htotal,
2432 unsigned int width,
2433 unsigned int cpp,
2434 unsigned int latency)
2435 {
2436 unsigned int ret;
2437
2438 ret = intel_wm_method2(pixel_rate, htotal,
2439 width, cpp, latency);
2440 ret = DIV_ROUND_UP(ret, 64) + 2;
2441
2442 return ret;
2443 }
2444
2445 static uint32_t ilk_wm_fbc(uint32_t pri_val, uint32_t horiz_pixels,
2446 uint8_t cpp)
2447 {
2448 /*
2449 * Neither of these should be possible since this function shouldn't be
2450 * called if the CRTC is off or the plane is invisible. But let's be
2451 * extra paranoid to avoid a potential divide-by-zero if we screw up
2452 * elsewhere in the driver.
2453 */
2454 if (WARN_ON(!cpp))
2455 return 0;
2456 if (WARN_ON(!horiz_pixels))
2457 return 0;
2458
2459 return DIV_ROUND_UP(pri_val * 64, horiz_pixels * cpp) + 2;
2460 }
2461
2462 struct ilk_wm_maximums {
2463 uint16_t pri;
2464 uint16_t spr;
2465 uint16_t cur;
2466 uint16_t fbc;
2467 };
2468
2469 /*
2470 * For both WM_PIPE and WM_LP.
2471 * mem_value must be in 0.1us units.
2472 */
2473 static uint32_t ilk_compute_pri_wm(const struct intel_crtc_state *cstate,
2474 const struct intel_plane_state *pstate,
2475 uint32_t mem_value,
2476 bool is_lp)
2477 {
2478 uint32_t method1, method2;
2479 int cpp;
2480
2481 if (mem_value == 0)
2482 return U32_MAX;
2483
2484 if (!intel_wm_plane_visible(cstate, pstate))
2485 return 0;
2486
2487 cpp = pstate->base.fb->format->cpp[0];
2488
2489 method1 = ilk_wm_method1(cstate->pixel_rate, cpp, mem_value);
2490
2491 if (!is_lp)
2492 return method1;
2493
2494 method2 = ilk_wm_method2(cstate->pixel_rate,
2495 cstate->base.adjusted_mode.crtc_htotal,
2496 drm_rect_width(&pstate->base.dst),
2497 cpp, mem_value);
2498
2499 return min(method1, method2);
2500 }
2501
2502 /*
2503 * For both WM_PIPE and WM_LP.
2504 * mem_value must be in 0.1us units.
2505 */
2506 static uint32_t ilk_compute_spr_wm(const struct intel_crtc_state *cstate,
2507 const struct intel_plane_state *pstate,
2508 uint32_t mem_value)
2509 {
2510 uint32_t method1, method2;
2511 int cpp;
2512
2513 if (mem_value == 0)
2514 return U32_MAX;
2515
2516 if (!intel_wm_plane_visible(cstate, pstate))
2517 return 0;
2518
2519 cpp = pstate->base.fb->format->cpp[0];
2520
2521 method1 = ilk_wm_method1(cstate->pixel_rate, cpp, mem_value);
2522 method2 = ilk_wm_method2(cstate->pixel_rate,
2523 cstate->base.adjusted_mode.crtc_htotal,
2524 drm_rect_width(&pstate->base.dst),
2525 cpp, mem_value);
2526 return min(method1, method2);
2527 }
2528
2529 /*
2530 * For both WM_PIPE and WM_LP.
2531 * mem_value must be in 0.1us units.
2532 */
2533 static uint32_t ilk_compute_cur_wm(const struct intel_crtc_state *cstate,
2534 const struct intel_plane_state *pstate,
2535 uint32_t mem_value)
2536 {
2537 int cpp;
2538
2539 if (mem_value == 0)
2540 return U32_MAX;
2541
2542 if (!intel_wm_plane_visible(cstate, pstate))
2543 return 0;
2544
2545 cpp = pstate->base.fb->format->cpp[0];
2546
2547 return ilk_wm_method2(cstate->pixel_rate,
2548 cstate->base.adjusted_mode.crtc_htotal,
2549 pstate->base.crtc_w, cpp, mem_value);
2550 }
2551
2552 /* Only for WM_LP. */
2553 static uint32_t ilk_compute_fbc_wm(const struct intel_crtc_state *cstate,
2554 const struct intel_plane_state *pstate,
2555 uint32_t pri_val)
2556 {
2557 int cpp;
2558
2559 if (!intel_wm_plane_visible(cstate, pstate))
2560 return 0;
2561
2562 cpp = pstate->base.fb->format->cpp[0];
2563
2564 return ilk_wm_fbc(pri_val, drm_rect_width(&pstate->base.dst), cpp);
2565 }
2566
2567 static unsigned int
2568 ilk_display_fifo_size(const struct drm_i915_private *dev_priv)
2569 {
2570 if (INTEL_GEN(dev_priv) >= 8)
2571 return 3072;
2572 else if (INTEL_GEN(dev_priv) >= 7)
2573 return 768;
2574 else
2575 return 512;
2576 }
2577
2578 static unsigned int
2579 ilk_plane_wm_reg_max(const struct drm_i915_private *dev_priv,
2580 int level, bool is_sprite)
2581 {
2582 if (INTEL_GEN(dev_priv) >= 8)
2583 /* BDW primary/sprite plane watermarks */
2584 return level == 0 ? 255 : 2047;
2585 else if (INTEL_GEN(dev_priv) >= 7)
2586 /* IVB/HSW primary/sprite plane watermarks */
2587 return level == 0 ? 127 : 1023;
2588 else if (!is_sprite)
2589 /* ILK/SNB primary plane watermarks */
2590 return level == 0 ? 127 : 511;
2591 else
2592 /* ILK/SNB sprite plane watermarks */
2593 return level == 0 ? 63 : 255;
2594 }
2595
2596 static unsigned int
2597 ilk_cursor_wm_reg_max(const struct drm_i915_private *dev_priv, int level)
2598 {
2599 if (INTEL_GEN(dev_priv) >= 7)
2600 return level == 0 ? 63 : 255;
2601 else
2602 return level == 0 ? 31 : 63;
2603 }
2604
2605 static unsigned int ilk_fbc_wm_reg_max(const struct drm_i915_private *dev_priv)
2606 {
2607 if (INTEL_GEN(dev_priv) >= 8)
2608 return 31;
2609 else
2610 return 15;
2611 }
2612
2613 /* Calculate the maximum primary/sprite plane watermark */
2614 static unsigned int ilk_plane_wm_max(const struct drm_device *dev,
2615 int level,
2616 const struct intel_wm_config *config,
2617 enum intel_ddb_partitioning ddb_partitioning,
2618 bool is_sprite)
2619 {
2620 struct drm_i915_private *dev_priv = to_i915(dev);
2621 unsigned int fifo_size = ilk_display_fifo_size(dev_priv);
2622
2623 /* if sprites aren't enabled, sprites get nothing */
2624 if (is_sprite && !config->sprites_enabled)
2625 return 0;
2626
2627 /* HSW allows LP1+ watermarks even with multiple pipes */
2628 if (level == 0 || config->num_pipes_active > 1) {
2629 fifo_size /= INTEL_INFO(dev_priv)->num_pipes;
2630
2631 /*
2632 * For some reason the non self refresh
2633 * FIFO size is only half of the self
2634 * refresh FIFO size on ILK/SNB.
2635 */
2636 if (INTEL_GEN(dev_priv) <= 6)
2637 fifo_size /= 2;
2638 }
2639
2640 if (config->sprites_enabled) {
2641 /* level 0 is always calculated with 1:1 split */
2642 if (level > 0 && ddb_partitioning == INTEL_DDB_PART_5_6) {
2643 if (is_sprite)
2644 fifo_size *= 5;
2645 fifo_size /= 6;
2646 } else {
2647 fifo_size /= 2;
2648 }
2649 }
2650
2651 /* clamp to max that the registers can hold */
2652 return min(fifo_size, ilk_plane_wm_reg_max(dev_priv, level, is_sprite));
2653 }
2654
2655 /* Calculate the maximum cursor plane watermark */
2656 static unsigned int ilk_cursor_wm_max(const struct drm_device *dev,
2657 int level,
2658 const struct intel_wm_config *config)
2659 {
2660 /* HSW LP1+ watermarks w/ multiple pipes */
2661 if (level > 0 && config->num_pipes_active > 1)
2662 return 64;
2663
2664 /* otherwise just report max that registers can hold */
2665 return ilk_cursor_wm_reg_max(to_i915(dev), level);
2666 }
2667
2668 static void ilk_compute_wm_maximums(const struct drm_device *dev,
2669 int level,
2670 const struct intel_wm_config *config,
2671 enum intel_ddb_partitioning ddb_partitioning,
2672 struct ilk_wm_maximums *max)
2673 {
2674 max->pri = ilk_plane_wm_max(dev, level, config, ddb_partitioning, false);
2675 max->spr = ilk_plane_wm_max(dev, level, config, ddb_partitioning, true);
2676 max->cur = ilk_cursor_wm_max(dev, level, config);
2677 max->fbc = ilk_fbc_wm_reg_max(to_i915(dev));
2678 }
2679
2680 static void ilk_compute_wm_reg_maximums(const struct drm_i915_private *dev_priv,
2681 int level,
2682 struct ilk_wm_maximums *max)
2683 {
2684 max->pri = ilk_plane_wm_reg_max(dev_priv, level, false);
2685 max->spr = ilk_plane_wm_reg_max(dev_priv, level, true);
2686 max->cur = ilk_cursor_wm_reg_max(dev_priv, level);
2687 max->fbc = ilk_fbc_wm_reg_max(dev_priv);
2688 }
2689
2690 static bool ilk_validate_wm_level(int level,
2691 const struct ilk_wm_maximums *max,
2692 struct intel_wm_level *result)
2693 {
2694 bool ret;
2695
2696 /* already determined to be invalid? */
2697 if (!result->enable)
2698 return false;
2699
2700 result->enable = result->pri_val <= max->pri &&
2701 result->spr_val <= max->spr &&
2702 result->cur_val <= max->cur;
2703
2704 ret = result->enable;
2705
2706 /*
2707 * HACK until we can pre-compute everything,
2708 * and thus fail gracefully if LP0 watermarks
2709 * are exceeded...
2710 */
2711 if (level == 0 && !result->enable) {
2712 if (result->pri_val > max->pri)
2713 DRM_DEBUG_KMS("Primary WM%d too large %u (max %u)\n",
2714 level, result->pri_val, max->pri);
2715 if (result->spr_val > max->spr)
2716 DRM_DEBUG_KMS("Sprite WM%d too large %u (max %u)\n",
2717 level, result->spr_val, max->spr);
2718 if (result->cur_val > max->cur)
2719 DRM_DEBUG_KMS("Cursor WM%d too large %u (max %u)\n",
2720 level, result->cur_val, max->cur);
2721
2722 result->pri_val = min_t(uint32_t, result->pri_val, max->pri);
2723 result->spr_val = min_t(uint32_t, result->spr_val, max->spr);
2724 result->cur_val = min_t(uint32_t, result->cur_val, max->cur);
2725 result->enable = true;
2726 }
2727
2728 return ret;
2729 }
2730
2731 static void ilk_compute_wm_level(const struct drm_i915_private *dev_priv,
2732 const struct intel_crtc *intel_crtc,
2733 int level,
2734 struct intel_crtc_state *cstate,
2735 const struct intel_plane_state *pristate,
2736 const struct intel_plane_state *sprstate,
2737 const struct intel_plane_state *curstate,
2738 struct intel_wm_level *result)
2739 {
2740 uint16_t pri_latency = dev_priv->wm.pri_latency[level];
2741 uint16_t spr_latency = dev_priv->wm.spr_latency[level];
2742 uint16_t cur_latency = dev_priv->wm.cur_latency[level];
2743
2744 /* WM1+ latency values stored in 0.5us units */
2745 if (level > 0) {
2746 pri_latency *= 5;
2747 spr_latency *= 5;
2748 cur_latency *= 5;
2749 }
2750
2751 if (pristate) {
2752 result->pri_val = ilk_compute_pri_wm(cstate, pristate,
2753 pri_latency, level);
2754 result->fbc_val = ilk_compute_fbc_wm(cstate, pristate, result->pri_val);
2755 }
2756
2757 if (sprstate)
2758 result->spr_val = ilk_compute_spr_wm(cstate, sprstate, spr_latency);
2759
2760 if (curstate)
2761 result->cur_val = ilk_compute_cur_wm(cstate, curstate, cur_latency);
2762
2763 result->enable = true;
2764 }
2765
2766 static uint32_t
2767 hsw_compute_linetime_wm(const struct intel_crtc_state *cstate)
2768 {
2769 const struct intel_atomic_state *intel_state =
2770 to_intel_atomic_state(cstate->base.state);
2771 const struct drm_display_mode *adjusted_mode =
2772 &cstate->base.adjusted_mode;
2773 u32 linetime, ips_linetime;
2774
2775 if (!cstate->base.active)
2776 return 0;
2777 if (WARN_ON(adjusted_mode->crtc_clock == 0))
2778 return 0;
2779 if (WARN_ON(intel_state->cdclk.logical.cdclk == 0))
2780 return 0;
2781
2782 /* The WM are computed with base on how long it takes to fill a single
2783 * row at the given clock rate, multiplied by 8.
2784 * */
2785 linetime = DIV_ROUND_CLOSEST(adjusted_mode->crtc_htotal * 1000 * 8,
2786 adjusted_mode->crtc_clock);
2787 ips_linetime = DIV_ROUND_CLOSEST(adjusted_mode->crtc_htotal * 1000 * 8,
2788 intel_state->cdclk.logical.cdclk);
2789
2790 return PIPE_WM_LINETIME_IPS_LINETIME(ips_linetime) |
2791 PIPE_WM_LINETIME_TIME(linetime);
2792 }
2793
2794 static void intel_read_wm_latency(struct drm_i915_private *dev_priv,
2795 uint16_t wm[8])
2796 {
2797 if (INTEL_GEN(dev_priv) >= 9) {
2798 uint32_t val;
2799 int ret, i;
2800 int level, max_level = ilk_wm_max_level(dev_priv);
2801
2802 /* read the first set of memory latencies[0:3] */
2803 val = 0; /* data0 to be programmed to 0 for first set */
2804 mutex_lock(&dev_priv->pcu_lock);
2805 ret = sandybridge_pcode_read(dev_priv,
2806 GEN9_PCODE_READ_MEM_LATENCY,
2807 &val);
2808 mutex_unlock(&dev_priv->pcu_lock);
2809
2810 if (ret) {
2811 DRM_ERROR("SKL Mailbox read error = %d\n", ret);
2812 return;
2813 }
2814
2815 wm[0] = val & GEN9_MEM_LATENCY_LEVEL_MASK;
2816 wm[1] = (val >> GEN9_MEM_LATENCY_LEVEL_1_5_SHIFT) &
2817 GEN9_MEM_LATENCY_LEVEL_MASK;
2818 wm[2] = (val >> GEN9_MEM_LATENCY_LEVEL_2_6_SHIFT) &
2819 GEN9_MEM_LATENCY_LEVEL_MASK;
2820 wm[3] = (val >> GEN9_MEM_LATENCY_LEVEL_3_7_SHIFT) &
2821 GEN9_MEM_LATENCY_LEVEL_MASK;
2822
2823 /* read the second set of memory latencies[4:7] */
2824 val = 1; /* data0 to be programmed to 1 for second set */
2825 mutex_lock(&dev_priv->pcu_lock);
2826 ret = sandybridge_pcode_read(dev_priv,
2827 GEN9_PCODE_READ_MEM_LATENCY,
2828 &val);
2829 mutex_unlock(&dev_priv->pcu_lock);
2830 if (ret) {
2831 DRM_ERROR("SKL Mailbox read error = %d\n", ret);
2832 return;
2833 }
2834
2835 wm[4] = val & GEN9_MEM_LATENCY_LEVEL_MASK;
2836 wm[5] = (val >> GEN9_MEM_LATENCY_LEVEL_1_5_SHIFT) &
2837 GEN9_MEM_LATENCY_LEVEL_MASK;
2838 wm[6] = (val >> GEN9_MEM_LATENCY_LEVEL_2_6_SHIFT) &
2839 GEN9_MEM_LATENCY_LEVEL_MASK;
2840 wm[7] = (val >> GEN9_MEM_LATENCY_LEVEL_3_7_SHIFT) &
2841 GEN9_MEM_LATENCY_LEVEL_MASK;
2842
2843 /*
2844 * If a level n (n > 1) has a 0us latency, all levels m (m >= n)
2845 * need to be disabled. We make sure to sanitize the values out
2846 * of the punit to satisfy this requirement.
2847 */
2848 for (level = 1; level <= max_level; level++) {
2849 if (wm[level] == 0) {
2850 for (i = level + 1; i <= max_level; i++)
2851 wm[i] = 0;
2852 break;
2853 }
2854 }
2855
2856 /*
2857 * WaWmMemoryReadLatency:skl+,glk
2858 *
2859 * punit doesn't take into account the read latency so we need
2860 * to add 2us to the various latency levels we retrieve from the
2861 * punit when level 0 response data us 0us.
2862 */
2863 if (wm[0] == 0) {
2864 wm[0] += 2;
2865 for (level = 1; level <= max_level; level++) {
2866 if (wm[level] == 0)
2867 break;
2868 wm[level] += 2;
2869 }
2870 }
2871
2872 } else if (IS_HASWELL(dev_priv) || IS_BROADWELL(dev_priv)) {
2873 uint64_t sskpd = I915_READ64(MCH_SSKPD);
2874
2875 wm[0] = (sskpd >> 56) & 0xFF;
2876 if (wm[0] == 0)
2877 wm[0] = sskpd & 0xF;
2878 wm[1] = (sskpd >> 4) & 0xFF;
2879 wm[2] = (sskpd >> 12) & 0xFF;
2880 wm[3] = (sskpd >> 20) & 0x1FF;
2881 wm[4] = (sskpd >> 32) & 0x1FF;
2882 } else if (INTEL_GEN(dev_priv) >= 6) {
2883 uint32_t sskpd = I915_READ(MCH_SSKPD);
2884
2885 wm[0] = (sskpd >> SSKPD_WM0_SHIFT) & SSKPD_WM_MASK;
2886 wm[1] = (sskpd >> SSKPD_WM1_SHIFT) & SSKPD_WM_MASK;
2887 wm[2] = (sskpd >> SSKPD_WM2_SHIFT) & SSKPD_WM_MASK;
2888 wm[3] = (sskpd >> SSKPD_WM3_SHIFT) & SSKPD_WM_MASK;
2889 } else if (INTEL_GEN(dev_priv) >= 5) {
2890 uint32_t mltr = I915_READ(MLTR_ILK);
2891
2892 /* ILK primary LP0 latency is 700 ns */
2893 wm[0] = 7;
2894 wm[1] = (mltr >> MLTR_WM1_SHIFT) & ILK_SRLT_MASK;
2895 wm[2] = (mltr >> MLTR_WM2_SHIFT) & ILK_SRLT_MASK;
2896 } else {
2897 MISSING_CASE(INTEL_DEVID(dev_priv));
2898 }
2899 }
2900
2901 static void intel_fixup_spr_wm_latency(struct drm_i915_private *dev_priv,
2902 uint16_t wm[5])
2903 {
2904 /* ILK sprite LP0 latency is 1300 ns */
2905 if (IS_GEN5(dev_priv))
2906 wm[0] = 13;
2907 }
2908
2909 static void intel_fixup_cur_wm_latency(struct drm_i915_private *dev_priv,
2910 uint16_t wm[5])
2911 {
2912 /* ILK cursor LP0 latency is 1300 ns */
2913 if (IS_GEN5(dev_priv))
2914 wm[0] = 13;
2915
2916 /* WaDoubleCursorLP3Latency:ivb */
2917 if (IS_IVYBRIDGE(dev_priv))
2918 wm[3] *= 2;
2919 }
2920
2921 int ilk_wm_max_level(const struct drm_i915_private *dev_priv)
2922 {
2923 /* how many WM levels are we expecting */
2924 if (INTEL_GEN(dev_priv) >= 9)
2925 return 7;
2926 else if (IS_HASWELL(dev_priv) || IS_BROADWELL(dev_priv))
2927 return 4;
2928 else if (INTEL_GEN(dev_priv) >= 6)
2929 return 3;
2930 else
2931 return 2;
2932 }
2933
2934 static void intel_print_wm_latency(struct drm_i915_private *dev_priv,
2935 const char *name,
2936 const uint16_t wm[8])
2937 {
2938 int level, max_level = ilk_wm_max_level(dev_priv);
2939
2940 for (level = 0; level <= max_level; level++) {
2941 unsigned int latency = wm[level];
2942
2943 if (latency == 0) {
2944 DRM_DEBUG_KMS("%s WM%d latency not provided\n",
2945 name, level);
2946 continue;
2947 }
2948
2949 /*
2950 * - latencies are in us on gen9.
2951 * - before then, WM1+ latency values are in 0.5us units
2952 */
2953 if (INTEL_GEN(dev_priv) >= 9)
2954 latency *= 10;
2955 else if (level > 0)
2956 latency *= 5;
2957
2958 DRM_DEBUG_KMS("%s WM%d latency %u (%u.%u usec)\n",
2959 name, level, wm[level],
2960 latency / 10, latency % 10);
2961 }
2962 }
2963
2964 static bool ilk_increase_wm_latency(struct drm_i915_private *dev_priv,
2965 uint16_t wm[5], uint16_t min)
2966 {
2967 int level, max_level = ilk_wm_max_level(dev_priv);
2968
2969 if (wm[0] >= min)
2970 return false;
2971
2972 wm[0] = max(wm[0], min);
2973 for (level = 1; level <= max_level; level++)
2974 wm[level] = max_t(uint16_t, wm[level], DIV_ROUND_UP(min, 5));
2975
2976 return true;
2977 }
2978
2979 static void snb_wm_latency_quirk(struct drm_i915_private *dev_priv)
2980 {
2981 bool changed;
2982
2983 /*
2984 * The BIOS provided WM memory latency values are often
2985 * inadequate for high resolution displays. Adjust them.
2986 */
2987 changed = ilk_increase_wm_latency(dev_priv, dev_priv->wm.pri_latency, 12) |
2988 ilk_increase_wm_latency(dev_priv, dev_priv->wm.spr_latency, 12) |
2989 ilk_increase_wm_latency(dev_priv, dev_priv->wm.cur_latency, 12);
2990
2991 if (!changed)
2992 return;
2993
2994 DRM_DEBUG_KMS("WM latency values increased to avoid potential underruns\n");
2995 intel_print_wm_latency(dev_priv, "Primary", dev_priv->wm.pri_latency);
2996 intel_print_wm_latency(dev_priv, "Sprite", dev_priv->wm.spr_latency);
2997 intel_print_wm_latency(dev_priv, "Cursor", dev_priv->wm.cur_latency);
2998 }
2999
3000 static void snb_wm_lp3_irq_quirk(struct drm_i915_private *dev_priv)
3001 {
3002 /*
3003 * On some SNB machines (Thinkpad X220 Tablet at least)
3004 * LP3 usage can cause vblank interrupts to be lost.
3005 * The DEIIR bit will go high but it looks like the CPU
3006 * never gets interrupted.
3007 *
3008 * It's not clear whether other interrupt source could
3009 * be affected or if this is somehow limited to vblank
3010 * interrupts only. To play it safe we disable LP3
3011 * watermarks entirely.
3012 */
3013 if (dev_priv->wm.pri_latency[3] == 0 &&
3014 dev_priv->wm.spr_latency[3] == 0 &&
3015 dev_priv->wm.cur_latency[3] == 0)
3016 return;
3017
3018 dev_priv->wm.pri_latency[3] = 0;
3019 dev_priv->wm.spr_latency[3] = 0;
3020 dev_priv->wm.cur_latency[3] = 0;
3021
3022 DRM_DEBUG_KMS("LP3 watermarks disabled due to potential for lost interrupts\n");
3023 intel_print_wm_latency(dev_priv, "Primary", dev_priv->wm.pri_latency);
3024 intel_print_wm_latency(dev_priv, "Sprite", dev_priv->wm.spr_latency);
3025 intel_print_wm_latency(dev_priv, "Cursor", dev_priv->wm.cur_latency);
3026 }
3027
3028 static void ilk_setup_wm_latency(struct drm_i915_private *dev_priv)
3029 {
3030 intel_read_wm_latency(dev_priv, dev_priv->wm.pri_latency);
3031
3032 memcpy(dev_priv->wm.spr_latency, dev_priv->wm.pri_latency,
3033 sizeof(dev_priv->wm.pri_latency));
3034 memcpy(dev_priv->wm.cur_latency, dev_priv->wm.pri_latency,
3035 sizeof(dev_priv->wm.pri_latency));
3036
3037 intel_fixup_spr_wm_latency(dev_priv, dev_priv->wm.spr_latency);
3038 intel_fixup_cur_wm_latency(dev_priv, dev_priv->wm.cur_latency);
3039
3040 intel_print_wm_latency(dev_priv, "Primary", dev_priv->wm.pri_latency);
3041 intel_print_wm_latency(dev_priv, "Sprite", dev_priv->wm.spr_latency);
3042 intel_print_wm_latency(dev_priv, "Cursor", dev_priv->wm.cur_latency);
3043
3044 if (IS_GEN6(dev_priv)) {
3045 snb_wm_latency_quirk(dev_priv);
3046 snb_wm_lp3_irq_quirk(dev_priv);
3047 }
3048 }
3049
3050 static void skl_setup_wm_latency(struct drm_i915_private *dev_priv)
3051 {
3052 intel_read_wm_latency(dev_priv, dev_priv->wm.skl_latency);
3053 intel_print_wm_latency(dev_priv, "Gen9 Plane", dev_priv->wm.skl_latency);
3054 }
3055
3056 static bool ilk_validate_pipe_wm(struct drm_device *dev,
3057 struct intel_pipe_wm *pipe_wm)
3058 {
3059 /* LP0 watermark maximums depend on this pipe alone */
3060 const struct intel_wm_config config = {
3061 .num_pipes_active = 1,
3062 .sprites_enabled = pipe_wm->sprites_enabled,
3063 .sprites_scaled = pipe_wm->sprites_scaled,
3064 };
3065 struct ilk_wm_maximums max;
3066
3067 /* LP0 watermarks always use 1/2 DDB partitioning */
3068 ilk_compute_wm_maximums(dev, 0, &config, INTEL_DDB_PART_1_2, &max);
3069
3070 /* At least LP0 must be valid */
3071 if (!ilk_validate_wm_level(0, &max, &pipe_wm->wm[0])) {
3072 DRM_DEBUG_KMS("LP0 watermark invalid\n");
3073 return false;
3074 }
3075
3076 return true;
3077 }
3078
3079 /* Compute new watermarks for the pipe */
3080 static int ilk_compute_pipe_wm(struct intel_crtc_state *cstate)
3081 {
3082 struct drm_atomic_state *state = cstate->base.state;
3083 struct intel_crtc *intel_crtc = to_intel_crtc(cstate->base.crtc);
3084 struct intel_pipe_wm *pipe_wm;
3085 struct drm_device *dev = state->dev;
3086 const struct drm_i915_private *dev_priv = to_i915(dev);
3087 struct drm_plane *plane;
3088 const struct drm_plane_state *plane_state;
3089 const struct intel_plane_state *pristate = NULL;
3090 const struct intel_plane_state *sprstate = NULL;
3091 const struct intel_plane_state *curstate = NULL;
3092 int level, max_level = ilk_wm_max_level(dev_priv), usable_level;
3093 struct ilk_wm_maximums max;
3094
3095 pipe_wm = &cstate->wm.ilk.optimal;
3096
3097 drm_atomic_crtc_state_for_each_plane_state(plane, plane_state, &cstate->base) {
3098 const struct intel_plane_state *ps = to_intel_plane_state(plane_state);
3099
3100 if (plane->type == DRM_PLANE_TYPE_PRIMARY)
3101 pristate = ps;
3102 else if (plane->type == DRM_PLANE_TYPE_OVERLAY)
3103 sprstate = ps;
3104 else if (plane->type == DRM_PLANE_TYPE_CURSOR)
3105 curstate = ps;
3106 }
3107
3108 pipe_wm->pipe_enabled = cstate->base.active;
3109 if (sprstate) {
3110 pipe_wm->sprites_enabled = sprstate->base.visible;
3111 pipe_wm->sprites_scaled = sprstate->base.visible &&
3112 (drm_rect_width(&sprstate->base.dst) != drm_rect_width(&sprstate->base.src) >> 16 ||
3113 drm_rect_height(&sprstate->base.dst) != drm_rect_height(&sprstate->base.src) >> 16);
3114 }
3115
3116 usable_level = max_level;
3117
3118 /* ILK/SNB: LP2+ watermarks only w/o sprites */
3119 if (INTEL_GEN(dev_priv) <= 6 && pipe_wm->sprites_enabled)
3120 usable_level = 1;
3121
3122 /* ILK/SNB/IVB: LP1+ watermarks only w/o scaling */
3123 if (pipe_wm->sprites_scaled)
3124 usable_level = 0;
3125
3126 memset(&pipe_wm->wm, 0, sizeof(pipe_wm->wm));
3127 ilk_compute_wm_level(dev_priv, intel_crtc, 0, cstate,
3128 pristate, sprstate, curstate, &pipe_wm->wm[0]);
3129
3130 if (IS_HASWELL(dev_priv) || IS_BROADWELL(dev_priv))
3131 pipe_wm->linetime = hsw_compute_linetime_wm(cstate);
3132
3133 if (!ilk_validate_pipe_wm(dev, pipe_wm))
3134 return -EINVAL;
3135
3136 ilk_compute_wm_reg_maximums(dev_priv, 1, &max);
3137
3138 for (level = 1; level <= usable_level; level++) {
3139 struct intel_wm_level *wm = &pipe_wm->wm[level];
3140
3141 ilk_compute_wm_level(dev_priv, intel_crtc, level, cstate,
3142 pristate, sprstate, curstate, wm);
3143
3144 /*
3145 * Disable any watermark level that exceeds the
3146 * register maximums since such watermarks are
3147 * always invalid.
3148 */
3149 if (!ilk_validate_wm_level(level, &max, wm)) {
3150 memset(wm, 0, sizeof(*wm));
3151 break;
3152 }
3153 }
3154
3155 return 0;
3156 }
3157
3158 /*
3159 * Build a set of 'intermediate' watermark values that satisfy both the old
3160 * state and the new state. These can be programmed to the hardware
3161 * immediately.
3162 */
3163 static int ilk_compute_intermediate_wm(struct drm_device *dev,
3164 struct intel_crtc *intel_crtc,
3165 struct intel_crtc_state *newstate)
3166 {
3167 struct intel_pipe_wm *a = &newstate->wm.ilk.intermediate;
3168 struct intel_atomic_state *intel_state =
3169 to_intel_atomic_state(newstate->base.state);
3170 const struct intel_crtc_state *oldstate =
3171 intel_atomic_get_old_crtc_state(intel_state, intel_crtc);
3172 const struct intel_pipe_wm *b = &oldstate->wm.ilk.optimal;
3173 int level, max_level = ilk_wm_max_level(to_i915(dev));
3174
3175 /*
3176 * Start with the final, target watermarks, then combine with the
3177 * currently active watermarks to get values that are safe both before
3178 * and after the vblank.
3179 */
3180 *a = newstate->wm.ilk.optimal;
3181 if (!newstate->base.active || drm_atomic_crtc_needs_modeset(&newstate->base))
3182 return 0;
3183
3184 a->pipe_enabled |= b->pipe_enabled;
3185 a->sprites_enabled |= b->sprites_enabled;
3186 a->sprites_scaled |= b->sprites_scaled;
3187
3188 for (level = 0; level <= max_level; level++) {
3189 struct intel_wm_level *a_wm = &a->wm[level];
3190 const struct intel_wm_level *b_wm = &b->wm[level];
3191
3192 a_wm->enable &= b_wm->enable;
3193 a_wm->pri_val = max(a_wm->pri_val, b_wm->pri_val);
3194 a_wm->spr_val = max(a_wm->spr_val, b_wm->spr_val);
3195 a_wm->cur_val = max(a_wm->cur_val, b_wm->cur_val);
3196 a_wm->fbc_val = max(a_wm->fbc_val, b_wm->fbc_val);
3197 }
3198
3199 /*
3200 * We need to make sure that these merged watermark values are
3201 * actually a valid configuration themselves. If they're not,
3202 * there's no safe way to transition from the old state to
3203 * the new state, so we need to fail the atomic transaction.
3204 */
3205 if (!ilk_validate_pipe_wm(dev, a))
3206 return -EINVAL;
3207
3208 /*
3209 * If our intermediate WM are identical to the final WM, then we can
3210 * omit the post-vblank programming; only update if it's different.
3211 */
3212 if (memcmp(a, &newstate->wm.ilk.optimal, sizeof(*a)) != 0)
3213 newstate->wm.need_postvbl_update = true;
3214
3215 return 0;
3216 }
3217
3218 /*
3219 * Merge the watermarks from all active pipes for a specific level.
3220 */
3221 static void ilk_merge_wm_level(struct drm_device *dev,
3222 int level,
3223 struct intel_wm_level *ret_wm)
3224 {
3225 const struct intel_crtc *intel_crtc;
3226
3227 ret_wm->enable = true;
3228
3229 for_each_intel_crtc(dev, intel_crtc) {
3230 const struct intel_pipe_wm *active = &intel_crtc->wm.active.ilk;
3231 const struct intel_wm_level *wm = &active->wm[level];
3232
3233 if (!active->pipe_enabled)
3234 continue;
3235
3236 /*
3237 * The watermark values may have been used in the past,
3238 * so we must maintain them in the registers for some
3239 * time even if the level is now disabled.
3240 */
3241 if (!wm->enable)
3242 ret_wm->enable = false;
3243
3244 ret_wm->pri_val = max(ret_wm->pri_val, wm->pri_val);
3245 ret_wm->spr_val = max(ret_wm->spr_val, wm->spr_val);
3246 ret_wm->cur_val = max(ret_wm->cur_val, wm->cur_val);
3247 ret_wm->fbc_val = max(ret_wm->fbc_val, wm->fbc_val);
3248 }
3249 }
3250
3251 /*
3252 * Merge all low power watermarks for all active pipes.
3253 */
3254 static void ilk_wm_merge(struct drm_device *dev,
3255 const struct intel_wm_config *config,
3256 const struct ilk_wm_maximums *max,
3257 struct intel_pipe_wm *merged)
3258 {
3259 struct drm_i915_private *dev_priv = to_i915(dev);
3260 int level, max_level = ilk_wm_max_level(dev_priv);
3261 int last_enabled_level = max_level;
3262
3263 /* ILK/SNB/IVB: LP1+ watermarks only w/ single pipe */
3264 if ((INTEL_GEN(dev_priv) <= 6 || IS_IVYBRIDGE(dev_priv)) &&
3265 config->num_pipes_active > 1)
3266 last_enabled_level = 0;
3267
3268 /* ILK: FBC WM must be disabled always */
3269 merged->fbc_wm_enabled = INTEL_GEN(dev_priv) >= 6;
3270
3271 /* merge each WM1+ level */
3272 for (level = 1; level <= max_level; level++) {
3273 struct intel_wm_level *wm = &merged->wm[level];
3274
3275 ilk_merge_wm_level(dev, level, wm);
3276
3277 if (level > last_enabled_level)
3278 wm->enable = false;
3279 else if (!ilk_validate_wm_level(level, max, wm))
3280 /* make sure all following levels get disabled */
3281 last_enabled_level = level - 1;
3282
3283 /*
3284 * The spec says it is preferred to disable
3285 * FBC WMs instead of disabling a WM level.
3286 */
3287 if (wm->fbc_val > max->fbc) {
3288 if (wm->enable)
3289 merged->fbc_wm_enabled = false;
3290 wm->fbc_val = 0;
3291 }
3292 }
3293
3294 /* ILK: LP2+ must be disabled when FBC WM is disabled but FBC enabled */
3295 /*
3296 * FIXME this is racy. FBC might get enabled later.
3297 * What we should check here is whether FBC can be
3298 * enabled sometime later.
3299 */
3300 if (IS_GEN5(dev_priv) && !merged->fbc_wm_enabled &&
3301 intel_fbc_is_active(dev_priv)) {
3302 for (level = 2; level <= max_level; level++) {
3303 struct intel_wm_level *wm = &merged->wm[level];
3304
3305 wm->enable = false;
3306 }
3307 }
3308 }
3309
3310 static int ilk_wm_lp_to_level(int wm_lp, const struct intel_pipe_wm *pipe_wm)
3311 {
3312 /* LP1,LP2,LP3 levels are either 1,2,3 or 1,3,4 */
3313 return wm_lp + (wm_lp >= 2 && pipe_wm->wm[4].enable);
3314 }
3315
3316 /* The value we need to program into the WM_LPx latency field */
3317 static unsigned int ilk_wm_lp_latency(struct drm_device *dev, int level)
3318 {
3319 struct drm_i915_private *dev_priv = to_i915(dev);
3320
3321 if (IS_HASWELL(dev_priv) || IS_BROADWELL(dev_priv))
3322 return 2 * level;
3323 else
3324 return dev_priv->wm.pri_latency[level];
3325 }
3326
3327 static void ilk_compute_wm_results(struct drm_device *dev,
3328 const struct intel_pipe_wm *merged,
3329 enum intel_ddb_partitioning partitioning,
3330 struct ilk_wm_values *results)
3331 {
3332 struct drm_i915_private *dev_priv = to_i915(dev);
3333 struct intel_crtc *intel_crtc;
3334 int level, wm_lp;
3335
3336 results->enable_fbc_wm = merged->fbc_wm_enabled;
3337 results->partitioning = partitioning;
3338
3339 /* LP1+ register values */
3340 for (wm_lp = 1; wm_lp <= 3; wm_lp++) {
3341 const struct intel_wm_level *r;
3342
3343 level = ilk_wm_lp_to_level(wm_lp, merged);
3344
3345 r = &merged->wm[level];
3346
3347 /*
3348 * Maintain the watermark values even if the level is
3349 * disabled. Doing otherwise could cause underruns.
3350 */
3351 results->wm_lp[wm_lp - 1] =
3352 (ilk_wm_lp_latency(dev, level) << WM1_LP_LATENCY_SHIFT) |
3353 (r->pri_val << WM1_LP_SR_SHIFT) |
3354 r->cur_val;
3355
3356 if (r->enable)
3357 results->wm_lp[wm_lp - 1] |= WM1_LP_SR_EN;
3358
3359 if (INTEL_GEN(dev_priv) >= 8)
3360 results->wm_lp[wm_lp - 1] |=
3361 r->fbc_val << WM1_LP_FBC_SHIFT_BDW;
3362 else
3363 results->wm_lp[wm_lp - 1] |=
3364 r->fbc_val << WM1_LP_FBC_SHIFT;
3365
3366 /*
3367 * Always set WM1S_LP_EN when spr_val != 0, even if the
3368 * level is disabled. Doing otherwise could cause underruns.
3369 */
3370 if (INTEL_GEN(dev_priv) <= 6 && r->spr_val) {
3371 WARN_ON(wm_lp != 1);
3372 results->wm_lp_spr[wm_lp - 1] = WM1S_LP_EN | r->spr_val;
3373 } else
3374 results->wm_lp_spr[wm_lp - 1] = r->spr_val;
3375 }
3376
3377 /* LP0 register values */
3378 for_each_intel_crtc(dev, intel_crtc) {
3379 enum pipe pipe = intel_crtc->pipe;
3380 const struct intel_wm_level *r =
3381 &intel_crtc->wm.active.ilk.wm[0];
3382
3383 if (WARN_ON(!r->enable))
3384 continue;
3385
3386 results->wm_linetime[pipe] = intel_crtc->wm.active.ilk.linetime;
3387
3388 results->wm_pipe[pipe] =
3389 (r->pri_val << WM0_PIPE_PLANE_SHIFT) |
3390 (r->spr_val << WM0_PIPE_SPRITE_SHIFT) |
3391 r->cur_val;
3392 }
3393 }
3394
3395 /* Find the result with the highest level enabled. Check for enable_fbc_wm in
3396 * case both are at the same level. Prefer r1 in case they're the same. */
3397 static struct intel_pipe_wm *ilk_find_best_result(struct drm_device *dev,
3398 struct intel_pipe_wm *r1,
3399 struct intel_pipe_wm *r2)
3400 {
3401 int level, max_level = ilk_wm_max_level(to_i915(dev));
3402 int level1 = 0, level2 = 0;
3403
3404 for (level = 1; level <= max_level; level++) {
3405 if (r1->wm[level].enable)
3406 level1 = level;
3407 if (r2->wm[level].enable)
3408 level2 = level;
3409 }
3410
3411 if (level1 == level2) {
3412 if (r2->fbc_wm_enabled && !r1->fbc_wm_enabled)
3413 return r2;
3414 else
3415 return r1;
3416 } else if (level1 > level2) {
3417 return r1;
3418 } else {
3419 return r2;
3420 }
3421 }
3422
3423 /* dirty bits used to track which watermarks need changes */
3424 #define WM_DIRTY_PIPE(pipe) (1 << (pipe))
3425 #define WM_DIRTY_LINETIME(pipe) (1 << (8 + (pipe)))
3426 #define WM_DIRTY_LP(wm_lp) (1 << (15 + (wm_lp)))
3427 #define WM_DIRTY_LP_ALL (WM_DIRTY_LP(1) | WM_DIRTY_LP(2) | WM_DIRTY_LP(3))
3428 #define WM_DIRTY_FBC (1 << 24)
3429 #define WM_DIRTY_DDB (1 << 25)
3430
3431 static unsigned int ilk_compute_wm_dirty(struct drm_i915_private *dev_priv,
3432 const struct ilk_wm_values *old,
3433 const struct ilk_wm_values *new)
3434 {
3435 unsigned int dirty = 0;
3436 enum pipe pipe;
3437 int wm_lp;
3438
3439 for_each_pipe(dev_priv, pipe) {
3440 if (old->wm_linetime[pipe] != new->wm_linetime[pipe]) {
3441 dirty |= WM_DIRTY_LINETIME(pipe);
3442 /* Must disable LP1+ watermarks too */
3443 dirty |= WM_DIRTY_LP_ALL;
3444 }
3445
3446 if (old->wm_pipe[pipe] != new->wm_pipe[pipe]) {
3447 dirty |= WM_DIRTY_PIPE(pipe);
3448 /* Must disable LP1+ watermarks too */
3449 dirty |= WM_DIRTY_LP_ALL;
3450 }
3451 }
3452
3453 if (old->enable_fbc_wm != new->enable_fbc_wm) {
3454 dirty |= WM_DIRTY_FBC;
3455 /* Must disable LP1+ watermarks too */
3456 dirty |= WM_DIRTY_LP_ALL;
3457 }
3458
3459 if (old->partitioning != new->partitioning) {
3460 dirty |= WM_DIRTY_DDB;
3461 /* Must disable LP1+ watermarks too */
3462 dirty |= WM_DIRTY_LP_ALL;
3463 }
3464
3465 /* LP1+ watermarks already deemed dirty, no need to continue */
3466 if (dirty & WM_DIRTY_LP_ALL)
3467 return dirty;
3468
3469 /* Find the lowest numbered LP1+ watermark in need of an update... */
3470 for (wm_lp = 1; wm_lp <= 3; wm_lp++) {
3471 if (old->wm_lp[wm_lp - 1] != new->wm_lp[wm_lp - 1] ||
3472 old->wm_lp_spr[wm_lp - 1] != new->wm_lp_spr[wm_lp - 1])
3473 break;
3474 }
3475
3476 /* ...and mark it and all higher numbered LP1+ watermarks as dirty */
3477 for (; wm_lp <= 3; wm_lp++)
3478 dirty |= WM_DIRTY_LP(wm_lp);
3479
3480 return dirty;
3481 }
3482
3483 static bool _ilk_disable_lp_wm(struct drm_i915_private *dev_priv,
3484 unsigned int dirty)
3485 {
3486 struct ilk_wm_values *previous = &dev_priv->wm.hw;
3487 bool changed = false;
3488
3489 if (dirty & WM_DIRTY_LP(3) && previous->wm_lp[2] & WM1_LP_SR_EN) {
3490 previous->wm_lp[2] &= ~WM1_LP_SR_EN;
3491 I915_WRITE(WM3_LP_ILK, previous->wm_lp[2]);
3492 changed = true;
3493 }
3494 if (dirty & WM_DIRTY_LP(2) && previous->wm_lp[1] & WM1_LP_SR_EN) {
3495 previous->wm_lp[1] &= ~WM1_LP_SR_EN;
3496 I915_WRITE(WM2_LP_ILK, previous->wm_lp[1]);
3497 changed = true;
3498 }
3499 if (dirty & WM_DIRTY_LP(1) && previous->wm_lp[0] & WM1_LP_SR_EN) {
3500 previous->wm_lp[0] &= ~WM1_LP_SR_EN;
3501 I915_WRITE(WM1_LP_ILK, previous->wm_lp[0]);
3502 changed = true;
3503 }
3504
3505 /*
3506 * Don't touch WM1S_LP_EN here.
3507 * Doing so could cause underruns.
3508 */
3509
3510 return changed;
3511 }
3512
3513 /*
3514 * The spec says we shouldn't write when we don't need, because every write
3515 * causes WMs to be re-evaluated, expending some power.
3516 */
3517 static void ilk_write_wm_values(struct drm_i915_private *dev_priv,
3518 struct ilk_wm_values *results)
3519 {
3520 struct ilk_wm_values *previous = &dev_priv->wm.hw;
3521 unsigned int dirty;
3522 uint32_t val;
3523
3524 dirty = ilk_compute_wm_dirty(dev_priv, previous, results);
3525 if (!dirty)
3526 return;
3527
3528 _ilk_disable_lp_wm(dev_priv, dirty);
3529
3530 if (dirty & WM_DIRTY_PIPE(PIPE_A))
3531 I915_WRITE(WM0_PIPEA_ILK, results->wm_pipe[0]);
3532 if (dirty & WM_DIRTY_PIPE(PIPE_B))
3533 I915_WRITE(WM0_PIPEB_ILK, results->wm_pipe[1]);
3534 if (dirty & WM_DIRTY_PIPE(PIPE_C))
3535 I915_WRITE(WM0_PIPEC_IVB, results->wm_pipe[2]);
3536
3537 if (dirty & WM_DIRTY_LINETIME(PIPE_A))
3538 I915_WRITE(PIPE_WM_LINETIME(PIPE_A), results->wm_linetime[0]);
3539 if (dirty & WM_DIRTY_LINETIME(PIPE_B))
3540 I915_WRITE(PIPE_WM_LINETIME(PIPE_B), results->wm_linetime[1]);
3541 if (dirty & WM_DIRTY_LINETIME(PIPE_C))
3542 I915_WRITE(PIPE_WM_LINETIME(PIPE_C), results->wm_linetime[2]);
3543
3544 if (dirty & WM_DIRTY_DDB) {
3545 if (IS_HASWELL(dev_priv) || IS_BROADWELL(dev_priv)) {
3546 val = I915_READ(WM_MISC);
3547 if (results->partitioning == INTEL_DDB_PART_1_2)
3548 val &= ~WM_MISC_DATA_PARTITION_5_6;
3549 else
3550 val |= WM_MISC_DATA_PARTITION_5_6;
3551 I915_WRITE(WM_MISC, val);
3552 } else {
3553 val = I915_READ(DISP_ARB_CTL2);
3554 if (results->partitioning == INTEL_DDB_PART_1_2)
3555 val &= ~DISP_DATA_PARTITION_5_6;
3556 else
3557 val |= DISP_DATA_PARTITION_5_6;
3558 I915_WRITE(DISP_ARB_CTL2, val);
3559 }
3560 }
3561
3562 if (dirty & WM_DIRTY_FBC) {
3563 val = I915_READ(DISP_ARB_CTL);
3564 if (results->enable_fbc_wm)
3565 val &= ~DISP_FBC_WM_DIS;
3566 else
3567 val |= DISP_FBC_WM_DIS;
3568 I915_WRITE(DISP_ARB_CTL, val);
3569 }
3570
3571 if (dirty & WM_DIRTY_LP(1) &&
3572 previous->wm_lp_spr[0] != results->wm_lp_spr[0])
3573 I915_WRITE(WM1S_LP_ILK, results->wm_lp_spr[0]);
3574
3575 if (INTEL_GEN(dev_priv) >= 7) {
3576 if (dirty & WM_DIRTY_LP(2) && previous->wm_lp_spr[1] != results->wm_lp_spr[1])
3577 I915_WRITE(WM2S_LP_IVB, results->wm_lp_spr[1]);
3578 if (dirty & WM_DIRTY_LP(3) && previous->wm_lp_spr[2] != results->wm_lp_spr[2])
3579 I915_WRITE(WM3S_LP_IVB, results->wm_lp_spr[2]);
3580 }
3581
3582 if (dirty & WM_DIRTY_LP(1) && previous->wm_lp[0] != results->wm_lp[0])
3583 I915_WRITE(WM1_LP_ILK, results->wm_lp[0]);
3584 if (dirty & WM_DIRTY_LP(2) && previous->wm_lp[1] != results->wm_lp[1])
3585 I915_WRITE(WM2_LP_ILK, results->wm_lp[1]);
3586 if (dirty & WM_DIRTY_LP(3) && previous->wm_lp[2] != results->wm_lp[2])
3587 I915_WRITE(WM3_LP_ILK, results->wm_lp[2]);
3588
3589 dev_priv->wm.hw = *results;
3590 }
3591
3592 bool ilk_disable_lp_wm(struct drm_device *dev)
3593 {
3594 struct drm_i915_private *dev_priv = to_i915(dev);
3595
3596 return _ilk_disable_lp_wm(dev_priv, WM_DIRTY_LP_ALL);
3597 }
3598
3599 /*
3600 * FIXME: We still don't have the proper code detect if we need to apply the WA,
3601 * so assume we'll always need it in order to avoid underruns.
3602 */
3603 static bool skl_needs_memory_bw_wa(struct intel_atomic_state *state)
3604 {
3605 struct drm_i915_private *dev_priv = to_i915(state->base.dev);
3606
3607 if (IS_GEN9_BC(dev_priv) || IS_BROXTON(dev_priv))
3608 return true;
3609
3610 return false;
3611 }
3612
3613 static bool
3614 intel_has_sagv(struct drm_i915_private *dev_priv)
3615 {
3616 if (IS_KABYLAKE(dev_priv) || IS_COFFEELAKE(dev_priv) ||
3617 IS_CANNONLAKE(dev_priv))
3618 return true;
3619
3620 if (IS_SKYLAKE(dev_priv) &&
3621 dev_priv->sagv_status != I915_SAGV_NOT_CONTROLLED)
3622 return true;
3623
3624 return false;
3625 }
3626
3627 /*
3628 * SAGV dynamically adjusts the system agent voltage and clock frequencies
3629 * depending on power and performance requirements. The display engine access
3630 * to system memory is blocked during the adjustment time. Because of the
3631 * blocking time, having this enabled can cause full system hangs and/or pipe
3632 * underruns if we don't meet all of the following requirements:
3633 *
3634 * - <= 1 pipe enabled
3635 * - All planes can enable watermarks for latencies >= SAGV engine block time
3636 * - We're not using an interlaced display configuration
3637 */
3638 int
3639 intel_enable_sagv(struct drm_i915_private *dev_priv)
3640 {
3641 int ret;
3642
3643 if (!intel_has_sagv(dev_priv))
3644 return 0;
3645
3646 if (dev_priv->sagv_status == I915_SAGV_ENABLED)
3647 return 0;
3648
3649 DRM_DEBUG_KMS("Enabling the SAGV\n");
3650 mutex_lock(&dev_priv->pcu_lock);
3651
3652 ret = sandybridge_pcode_write(dev_priv, GEN9_PCODE_SAGV_CONTROL,
3653 GEN9_SAGV_ENABLE);
3654
3655 /* We don't need to wait for the SAGV when enabling */
3656 mutex_unlock(&dev_priv->pcu_lock);
3657
3658 /*
3659 * Some skl systems, pre-release machines in particular,
3660 * don't actually have an SAGV.
3661 */
3662 if (IS_SKYLAKE(dev_priv) && ret == -ENXIO) {
3663 DRM_DEBUG_DRIVER("No SAGV found on system, ignoring\n");
3664 dev_priv->sagv_status = I915_SAGV_NOT_CONTROLLED;
3665 return 0;
3666 } else if (ret < 0) {
3667 DRM_ERROR("Failed to enable the SAGV\n");
3668 return ret;
3669 }
3670
3671 dev_priv->sagv_status = I915_SAGV_ENABLED;
3672 return 0;
3673 }
3674
3675 int
3676 intel_disable_sagv(struct drm_i915_private *dev_priv)
3677 {
3678 int ret;
3679
3680 if (!intel_has_sagv(dev_priv))
3681 return 0;
3682
3683 if (dev_priv->sagv_status == I915_SAGV_DISABLED)
3684 return 0;
3685
3686 DRM_DEBUG_KMS("Disabling the SAGV\n");
3687 mutex_lock(&dev_priv->pcu_lock);
3688
3689 /* bspec says to keep retrying for at least 1 ms */
3690 ret = skl_pcode_request(dev_priv, GEN9_PCODE_SAGV_CONTROL,
3691 GEN9_SAGV_DISABLE,
3692 GEN9_SAGV_IS_DISABLED, GEN9_SAGV_IS_DISABLED,
3693 1);
3694 mutex_unlock(&dev_priv->pcu_lock);
3695
3696 /*
3697 * Some skl systems, pre-release machines in particular,
3698 * don't actually have an SAGV.
3699 */
3700 if (IS_SKYLAKE(dev_priv) && ret == -ENXIO) {
3701 DRM_DEBUG_DRIVER("No SAGV found on system, ignoring\n");
3702 dev_priv->sagv_status = I915_SAGV_NOT_CONTROLLED;
3703 return 0;
3704 } else if (ret < 0) {
3705 DRM_ERROR("Failed to disable the SAGV (%d)\n", ret);
3706 return ret;
3707 }
3708
3709 dev_priv->sagv_status = I915_SAGV_DISABLED;
3710 return 0;
3711 }
3712
3713 bool intel_can_enable_sagv(struct drm_atomic_state *state)
3714 {
3715 struct drm_device *dev = state->dev;
3716 struct drm_i915_private *dev_priv = to_i915(dev);
3717 struct intel_atomic_state *intel_state = to_intel_atomic_state(state);
3718 struct intel_crtc *crtc;
3719 struct intel_plane *plane;
3720 struct intel_crtc_state *cstate;
3721 enum pipe pipe;
3722 int level, latency;
3723 int sagv_block_time_us = IS_GEN9(dev_priv) ? 30 : 20;
3724
3725 if (!intel_has_sagv(dev_priv))
3726 return false;
3727
3728 /*
3729 * SKL+ workaround: bspec recommends we disable the SAGV when we have
3730 * more then one pipe enabled
3731 *
3732 * If there are no active CRTCs, no additional checks need be performed
3733 */
3734 if (hweight32(intel_state->active_crtcs) == 0)
3735 return true;
3736 else if (hweight32(intel_state->active_crtcs) > 1)
3737 return false;
3738
3739 /* Since we're now guaranteed to only have one active CRTC... */
3740 pipe = ffs(intel_state->active_crtcs) - 1;
3741 crtc = intel_get_crtc_for_pipe(dev_priv, pipe);
3742 cstate = to_intel_crtc_state(crtc->base.state);
3743
3744 if (crtc->base.state->adjusted_mode.flags & DRM_MODE_FLAG_INTERLACE)
3745 return false;
3746
3747 for_each_intel_plane_on_crtc(dev, crtc, plane) {
3748 struct skl_plane_wm *wm =
3749 &cstate->wm.skl.optimal.planes[plane->id];
3750
3751 /* Skip this plane if it's not enabled */
3752 if (!wm->wm[0].plane_en)
3753 continue;
3754
3755 /* Find the highest enabled wm level for this plane */
3756 for (level = ilk_wm_max_level(dev_priv);
3757 !wm->wm[level].plane_en; --level)
3758 { }
3759
3760 latency = dev_priv->wm.skl_latency[level];
3761
3762 if (skl_needs_memory_bw_wa(intel_state) &&
3763 plane->base.state->fb->modifier ==
3764 I915_FORMAT_MOD_X_TILED)
3765 latency += 15;
3766
3767 /*
3768 * If any of the planes on this pipe don't enable wm levels that
3769 * incur memory latencies higher than sagv_block_time_us we
3770 * can't enable the SAGV.
3771 */
3772 if (latency < sagv_block_time_us)
3773 return false;
3774 }
3775
3776 return true;
3777 }
3778
3779 static void
3780 skl_ddb_get_pipe_allocation_limits(struct drm_device *dev,
3781 const struct intel_crtc_state *cstate,
3782 struct skl_ddb_entry *alloc, /* out */
3783 int *num_active /* out */)
3784 {
3785 struct drm_atomic_state *state = cstate->base.state;
3786 struct intel_atomic_state *intel_state = to_intel_atomic_state(state);
3787 struct drm_i915_private *dev_priv = to_i915(dev);
3788 struct drm_crtc *for_crtc = cstate->base.crtc;
3789 unsigned int pipe_size, ddb_size;
3790 int nth_active_pipe;
3791
3792 if (WARN_ON(!state) || !cstate->base.active) {
3793 alloc->start = 0;
3794 alloc->end = 0;
3795 *num_active = hweight32(dev_priv->active_crtcs);
3796 return;
3797 }
3798
3799 if (intel_state->active_pipe_changes)
3800 *num_active = hweight32(intel_state->active_crtcs);
3801 else
3802 *num_active = hweight32(dev_priv->active_crtcs);
3803
3804 ddb_size = INTEL_INFO(dev_priv)->ddb_size;
3805 WARN_ON(ddb_size == 0);
3806
3807 ddb_size -= 4; /* 4 blocks for bypass path allocation */
3808
3809 /*
3810 * If the state doesn't change the active CRTC's, then there's
3811 * no need to recalculate; the existing pipe allocation limits
3812 * should remain unchanged. Note that we're safe from racing
3813 * commits since any racing commit that changes the active CRTC
3814 * list would need to grab _all_ crtc locks, including the one
3815 * we currently hold.
3816 */
3817 if (!intel_state->active_pipe_changes) {
3818 /*
3819 * alloc may be cleared by clear_intel_crtc_state,
3820 * copy from old state to be sure
3821 */
3822 *alloc = to_intel_crtc_state(for_crtc->state)->wm.skl.ddb;
3823 return;
3824 }
3825
3826 nth_active_pipe = hweight32(intel_state->active_crtcs &
3827 (drm_crtc_mask(for_crtc) - 1));
3828 pipe_size = ddb_size / hweight32(intel_state->active_crtcs);
3829 alloc->start = nth_active_pipe * ddb_size / *num_active;
3830 alloc->end = alloc->start + pipe_size;
3831 }
3832
3833 static unsigned int skl_cursor_allocation(int num_active)
3834 {
3835 if (num_active == 1)
3836 return 32;
3837
3838 return 8;
3839 }
3840
3841 static void skl_ddb_entry_init_from_hw(struct skl_ddb_entry *entry, u32 reg)
3842 {
3843 entry->start = reg & 0x3ff;
3844 entry->end = (reg >> 16) & 0x3ff;
3845 if (entry->end)
3846 entry->end += 1;
3847 }
3848
3849 void skl_ddb_get_hw_state(struct drm_i915_private *dev_priv,
3850 struct skl_ddb_allocation *ddb /* out */)
3851 {
3852 struct intel_crtc *crtc;
3853
3854 memset(ddb, 0, sizeof(*ddb));
3855
3856 for_each_intel_crtc(&dev_priv->drm, crtc) {
3857 enum intel_display_power_domain power_domain;
3858 enum plane_id plane_id;
3859 enum pipe pipe = crtc->pipe;
3860
3861 power_domain = POWER_DOMAIN_PIPE(pipe);
3862 if (!intel_display_power_get_if_enabled(dev_priv, power_domain))
3863 continue;
3864
3865 for_each_plane_id_on_crtc(crtc, plane_id) {
3866 u32 val;
3867
3868 if (plane_id != PLANE_CURSOR)
3869 val = I915_READ(PLANE_BUF_CFG(pipe, plane_id));
3870 else
3871 val = I915_READ(CUR_BUF_CFG(pipe));
3872
3873 skl_ddb_entry_init_from_hw(&ddb->plane[pipe][plane_id], val);
3874 }
3875
3876 intel_display_power_put(dev_priv, power_domain);
3877 }
3878 }
3879
3880 /*
3881 * Determines the downscale amount of a plane for the purposes of watermark calculations.
3882 * The bspec defines downscale amount as:
3883 *
3884 * """
3885 * Horizontal down scale amount = maximum[1, Horizontal source size /
3886 * Horizontal destination size]
3887 * Vertical down scale amount = maximum[1, Vertical source size /
3888 * Vertical destination size]
3889 * Total down scale amount = Horizontal down scale amount *
3890 * Vertical down scale amount
3891 * """
3892 *
3893 * Return value is provided in 16.16 fixed point form to retain fractional part.
3894 * Caller should take care of dividing & rounding off the value.
3895 */
3896 static uint_fixed_16_16_t
3897 skl_plane_downscale_amount(const struct intel_crtc_state *cstate,
3898 const struct intel_plane_state *pstate)
3899 {
3900 struct intel_plane *plane = to_intel_plane(pstate->base.plane);
3901 uint32_t src_w, src_h, dst_w, dst_h;
3902 uint_fixed_16_16_t fp_w_ratio, fp_h_ratio;
3903 uint_fixed_16_16_t downscale_h, downscale_w;
3904
3905 if (WARN_ON(!intel_wm_plane_visible(cstate, pstate)))
3906 return u32_to_fixed16(0);
3907
3908 /* n.b., src is 16.16 fixed point, dst is whole integer */
3909 if (plane->id == PLANE_CURSOR) {
3910 /*
3911 * Cursors only support 0/180 degree rotation,
3912 * hence no need to account for rotation here.
3913 */
3914 src_w = pstate->base.src_w >> 16;
3915 src_h = pstate->base.src_h >> 16;
3916 dst_w = pstate->base.crtc_w;
3917 dst_h = pstate->base.crtc_h;
3918 } else {
3919 /*
3920 * Src coordinates are already rotated by 270 degrees for
3921 * the 90/270 degree plane rotation cases (to match the
3922 * GTT mapping), hence no need to account for rotation here.
3923 */
3924 src_w = drm_rect_width(&pstate->base.src) >> 16;
3925 src_h = drm_rect_height(&pstate->base.src) >> 16;
3926 dst_w = drm_rect_width(&pstate->base.dst);
3927 dst_h = drm_rect_height(&pstate->base.dst);
3928 }
3929
3930 fp_w_ratio = div_fixed16(src_w, dst_w);
3931 fp_h_ratio = div_fixed16(src_h, dst_h);
3932 downscale_w = max_fixed16(fp_w_ratio, u32_to_fixed16(1));
3933 downscale_h = max_fixed16(fp_h_ratio, u32_to_fixed16(1));
3934
3935 return mul_fixed16(downscale_w, downscale_h);
3936 }
3937
3938 static uint_fixed_16_16_t
3939 skl_pipe_downscale_amount(const struct intel_crtc_state *crtc_state)
3940 {
3941 uint_fixed_16_16_t pipe_downscale = u32_to_fixed16(1);
3942
3943 if (!crtc_state->base.enable)
3944 return pipe_downscale;
3945
3946 if (crtc_state->pch_pfit.enabled) {
3947 uint32_t src_w, src_h, dst_w, dst_h;
3948 uint32_t pfit_size = crtc_state->pch_pfit.size;
3949 uint_fixed_16_16_t fp_w_ratio, fp_h_ratio;
3950 uint_fixed_16_16_t downscale_h, downscale_w;
3951
3952 src_w = crtc_state->pipe_src_w;
3953 src_h = crtc_state->pipe_src_h;
3954 dst_w = pfit_size >> 16;
3955 dst_h = pfit_size & 0xffff;
3956
3957 if (!dst_w || !dst_h)
3958 return pipe_downscale;
3959
3960 fp_w_ratio = div_fixed16(src_w, dst_w);
3961 fp_h_ratio = div_fixed16(src_h, dst_h);
3962 downscale_w = max_fixed16(fp_w_ratio, u32_to_fixed16(1));
3963 downscale_h = max_fixed16(fp_h_ratio, u32_to_fixed16(1));
3964
3965 pipe_downscale = mul_fixed16(downscale_w, downscale_h);
3966 }
3967
3968 return pipe_downscale;
3969 }
3970
3971 int skl_check_pipe_max_pixel_rate(struct intel_crtc *intel_crtc,
3972 struct intel_crtc_state *cstate)
3973 {
3974 struct drm_crtc_state *crtc_state = &cstate->base;
3975 struct drm_atomic_state *state = crtc_state->state;
3976 struct drm_plane *plane;
3977 const struct drm_plane_state *pstate;
3978 struct intel_plane_state *intel_pstate;
3979 int crtc_clock, dotclk;
3980 uint32_t pipe_max_pixel_rate;
3981 uint_fixed_16_16_t pipe_downscale;
3982 uint_fixed_16_16_t max_downscale = u32_to_fixed16(1);
3983
3984 if (!cstate->base.enable)
3985 return 0;
3986
3987 drm_atomic_crtc_state_for_each_plane_state(plane, pstate, crtc_state) {
3988 uint_fixed_16_16_t plane_downscale;
3989 uint_fixed_16_16_t fp_9_div_8 = div_fixed16(9, 8);
3990 int bpp;
3991
3992 if (!intel_wm_plane_visible(cstate,
3993 to_intel_plane_state(pstate)))
3994 continue;
3995
3996 if (WARN_ON(!pstate->fb))
3997 return -EINVAL;
3998
3999 intel_pstate = to_intel_plane_state(pstate);
4000 plane_downscale = skl_plane_downscale_amount(cstate,
4001 intel_pstate);
4002 bpp = pstate->fb->format->cpp[0] * 8;
4003 if (bpp == 64)
4004 plane_downscale = mul_fixed16(plane_downscale,
4005 fp_9_div_8);
4006
4007 max_downscale = max_fixed16(plane_downscale, max_downscale);
4008 }
4009 pipe_downscale = skl_pipe_downscale_amount(cstate);
4010
4011 pipe_downscale = mul_fixed16(pipe_downscale, max_downscale);
4012
4013 crtc_clock = crtc_state->adjusted_mode.crtc_clock;
4014 dotclk = to_intel_atomic_state(state)->cdclk.logical.cdclk;
4015
4016 if (IS_GEMINILAKE(to_i915(intel_crtc->base.dev)))
4017 dotclk *= 2;
4018
4019 pipe_max_pixel_rate = div_round_up_u32_fixed16(dotclk, pipe_downscale);
4020
4021 if (pipe_max_pixel_rate < crtc_clock) {
4022 DRM_DEBUG_KMS("Max supported pixel clock with scaling exceeded\n");
4023 return -EINVAL;
4024 }
4025
4026 return 0;
4027 }
4028
4029 static unsigned int
4030 skl_plane_relative_data_rate(const struct intel_crtc_state *cstate,
4031 const struct drm_plane_state *pstate,
4032 int y)
4033 {
4034 struct intel_plane *plane = to_intel_plane(pstate->plane);
4035 struct intel_plane_state *intel_pstate = to_intel_plane_state(pstate);
4036 uint32_t data_rate;
4037 uint32_t width = 0, height = 0;
4038 struct drm_framebuffer *fb;
4039 u32 format;
4040 uint_fixed_16_16_t down_scale_amount;
4041
4042 if (!intel_pstate->base.visible)
4043 return 0;
4044
4045 fb = pstate->fb;
4046 format = fb->format->format;
4047
4048 if (plane->id == PLANE_CURSOR)
4049 return 0;
4050 if (y && format != DRM_FORMAT_NV12)
4051 return 0;
4052
4053 /*
4054 * Src coordinates are already rotated by 270 degrees for
4055 * the 90/270 degree plane rotation cases (to match the
4056 * GTT mapping), hence no need to account for rotation here.
4057 */
4058 width = drm_rect_width(&intel_pstate->base.src) >> 16;
4059 height = drm_rect_height(&intel_pstate->base.src) >> 16;
4060
4061 /* for planar format */
4062 if (format == DRM_FORMAT_NV12) {
4063 if (y) /* y-plane data rate */
4064 data_rate = width * height *
4065 fb->format->cpp[0];
4066 else /* uv-plane data rate */
4067 data_rate = (width / 2) * (height / 2) *
4068 fb->format->cpp[1];
4069 } else {
4070 /* for packed formats */
4071 data_rate = width * height * fb->format->cpp[0];
4072 }
4073
4074 down_scale_amount = skl_plane_downscale_amount(cstate, intel_pstate);
4075
4076 return mul_round_up_u32_fixed16(data_rate, down_scale_amount);
4077 }
4078
4079 /*
4080 * We don't overflow 32 bits. Worst case is 3 planes enabled, each fetching
4081 * a 8192x4096@32bpp framebuffer:
4082 * 3 * 4096 * 8192 * 4 < 2^32
4083 */
4084 static unsigned int
4085 skl_get_total_relative_data_rate(struct intel_crtc_state *intel_cstate,
4086 unsigned *plane_data_rate,
4087 unsigned *plane_y_data_rate)
4088 {
4089 struct drm_crtc_state *cstate = &intel_cstate->base;
4090 struct drm_atomic_state *state = cstate->state;
4091 struct drm_plane *plane;
4092 const struct drm_plane_state *pstate;
4093 unsigned int total_data_rate = 0;
4094
4095 if (WARN_ON(!state))
4096 return 0;
4097
4098 /* Calculate and cache data rate for each plane */
4099 drm_atomic_crtc_state_for_each_plane_state(plane, pstate, cstate) {
4100 enum plane_id plane_id = to_intel_plane(plane)->id;
4101 unsigned int rate;
4102
4103 /* packed/uv */
4104 rate = skl_plane_relative_data_rate(intel_cstate,
4105 pstate, 0);
4106 plane_data_rate[plane_id] = rate;
4107
4108 total_data_rate += rate;
4109
4110 /* y-plane */
4111 rate = skl_plane_relative_data_rate(intel_cstate,
4112 pstate, 1);
4113 plane_y_data_rate[plane_id] = rate;
4114
4115 total_data_rate += rate;
4116 }
4117
4118 return total_data_rate;
4119 }
4120
4121 static uint16_t
4122 skl_ddb_min_alloc(const struct drm_plane_state *pstate,
4123 const int y)
4124 {
4125 struct drm_framebuffer *fb = pstate->fb;
4126 struct intel_plane_state *intel_pstate = to_intel_plane_state(pstate);
4127 uint32_t src_w, src_h;
4128 uint32_t min_scanlines = 8;
4129 uint8_t plane_bpp;
4130
4131 if (WARN_ON(!fb))
4132 return 0;
4133
4134 /* For packed formats, no y-plane, return 0 */
4135 if (y && fb->format->format != DRM_FORMAT_NV12)
4136 return 0;
4137
4138 /* For Non Y-tile return 8-blocks */
4139 if (fb->modifier != I915_FORMAT_MOD_Y_TILED &&
4140 fb->modifier != I915_FORMAT_MOD_Yf_TILED &&
4141 fb->modifier != I915_FORMAT_MOD_Y_TILED_CCS &&
4142 fb->modifier != I915_FORMAT_MOD_Yf_TILED_CCS)
4143 return 8;
4144
4145 /*
4146 * Src coordinates are already rotated by 270 degrees for
4147 * the 90/270 degree plane rotation cases (to match the
4148 * GTT mapping), hence no need to account for rotation here.
4149 */
4150 src_w = drm_rect_width(&intel_pstate->base.src) >> 16;
4151 src_h = drm_rect_height(&intel_pstate->base.src) >> 16;
4152
4153 /* Halve UV plane width and height for NV12 */
4154 if (fb->format->format == DRM_FORMAT_NV12 && !y) {
4155 src_w /= 2;
4156 src_h /= 2;
4157 }
4158
4159 if (fb->format->format == DRM_FORMAT_NV12 && !y)
4160 plane_bpp = fb->format->cpp[1];
4161 else
4162 plane_bpp = fb->format->cpp[0];
4163
4164 if (drm_rotation_90_or_270(pstate->rotation)) {
4165 switch (plane_bpp) {
4166 case 1:
4167 min_scanlines = 32;
4168 break;
4169 case 2:
4170 min_scanlines = 16;
4171 break;
4172 case 4:
4173 min_scanlines = 8;
4174 break;
4175 case 8:
4176 min_scanlines = 4;
4177 break;
4178 default:
4179 WARN(1, "Unsupported pixel depth %u for rotation",
4180 plane_bpp);
4181 min_scanlines = 32;
4182 }
4183 }
4184
4185 return DIV_ROUND_UP((4 * src_w * plane_bpp), 512) * min_scanlines/4 + 3;
4186 }
4187
4188 static void
4189 skl_ddb_calc_min(const struct intel_crtc_state *cstate, int num_active,
4190 uint16_t *minimum, uint16_t *y_minimum)
4191 {
4192 const struct drm_plane_state *pstate;
4193 struct drm_plane *plane;
4194
4195 drm_atomic_crtc_state_for_each_plane_state(plane, pstate, &cstate->base) {
4196 enum plane_id plane_id = to_intel_plane(plane)->id;
4197
4198 if (plane_id == PLANE_CURSOR)
4199 continue;
4200
4201 if (!pstate->visible)
4202 continue;
4203
4204 minimum[plane_id] = skl_ddb_min_alloc(pstate, 0);
4205 y_minimum[plane_id] = skl_ddb_min_alloc(pstate, 1);
4206 }
4207
4208 minimum[PLANE_CURSOR] = skl_cursor_allocation(num_active);
4209 }
4210
4211 static int
4212 skl_allocate_pipe_ddb(struct intel_crtc_state *cstate,
4213 struct skl_ddb_allocation *ddb /* out */)
4214 {
4215 struct drm_atomic_state *state = cstate->base.state;
4216 struct drm_crtc *crtc = cstate->base.crtc;
4217 struct drm_device *dev = crtc->dev;
4218 struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
4219 enum pipe pipe = intel_crtc->pipe;
4220 struct skl_ddb_entry *alloc = &cstate->wm.skl.ddb;
4221 uint16_t alloc_size, start;
4222 uint16_t minimum[I915_MAX_PLANES] = {};
4223 uint16_t y_minimum[I915_MAX_PLANES] = {};
4224 unsigned int total_data_rate;
4225 enum plane_id plane_id;
4226 int num_active;
4227 unsigned plane_data_rate[I915_MAX_PLANES] = {};
4228 unsigned plane_y_data_rate[I915_MAX_PLANES] = {};
4229 uint16_t total_min_blocks = 0;
4230
4231 /* Clear the partitioning for disabled planes. */
4232 memset(ddb->plane[pipe], 0, sizeof(ddb->plane[pipe]));
4233 memset(ddb->y_plane[pipe], 0, sizeof(ddb->y_plane[pipe]));
4234
4235 if (WARN_ON(!state))
4236 return 0;
4237
4238 if (!cstate->base.active) {
4239 alloc->start = alloc->end = 0;
4240 return 0;
4241 }
4242
4243 skl_ddb_get_pipe_allocation_limits(dev, cstate, alloc, &num_active);
4244 alloc_size = skl_ddb_entry_size(alloc);
4245 if (alloc_size == 0)
4246 return 0;
4247
4248 skl_ddb_calc_min(cstate, num_active, minimum, y_minimum);
4249
4250 /*
4251 * 1. Allocate the mininum required blocks for each active plane
4252 * and allocate the cursor, it doesn't require extra allocation
4253 * proportional to the data rate.
4254 */
4255
4256 for_each_plane_id_on_crtc(intel_crtc, plane_id) {
4257 total_min_blocks += minimum[plane_id];
4258 total_min_blocks += y_minimum[plane_id];
4259 }
4260
4261 if (total_min_blocks > alloc_size) {
4262 DRM_DEBUG_KMS("Requested display configuration exceeds system DDB limitations");
4263 DRM_DEBUG_KMS("minimum required %d/%d\n", total_min_blocks,
4264 alloc_size);
4265 return -EINVAL;
4266 }
4267
4268 alloc_size -= total_min_blocks;
4269 ddb->plane[pipe][PLANE_CURSOR].start = alloc->end - minimum[PLANE_CURSOR];
4270 ddb->plane[pipe][PLANE_CURSOR].end = alloc->end;
4271
4272 /*
4273 * 2. Distribute the remaining space in proportion to the amount of
4274 * data each plane needs to fetch from memory.
4275 *
4276 * FIXME: we may not allocate every single block here.
4277 */
4278 total_data_rate = skl_get_total_relative_data_rate(cstate,
4279 plane_data_rate,
4280 plane_y_data_rate);
4281 if (total_data_rate == 0)
4282 return 0;
4283
4284 start = alloc->start;
4285 for_each_plane_id_on_crtc(intel_crtc, plane_id) {
4286 unsigned int data_rate, y_data_rate;
4287 uint16_t plane_blocks, y_plane_blocks = 0;
4288
4289 if (plane_id == PLANE_CURSOR)
4290 continue;
4291
4292 data_rate = plane_data_rate[plane_id];
4293
4294 /*
4295 * allocation for (packed formats) or (uv-plane part of planar format):
4296 * promote the expression to 64 bits to avoid overflowing, the
4297 * result is < available as data_rate / total_data_rate < 1
4298 */
4299 plane_blocks = minimum[plane_id];
4300 plane_blocks += div_u64((uint64_t)alloc_size * data_rate,
4301 total_data_rate);
4302
4303 /* Leave disabled planes at (0,0) */
4304 if (data_rate) {
4305 ddb->plane[pipe][plane_id].start = start;
4306 ddb->plane[pipe][plane_id].end = start + plane_blocks;
4307 }
4308
4309 start += plane_blocks;
4310
4311 /*
4312 * allocation for y_plane part of planar format:
4313 */
4314 y_data_rate = plane_y_data_rate[plane_id];
4315
4316 y_plane_blocks = y_minimum[plane_id];
4317 y_plane_blocks += div_u64((uint64_t)alloc_size * y_data_rate,
4318 total_data_rate);
4319
4320 if (y_data_rate) {
4321 ddb->y_plane[pipe][plane_id].start = start;
4322 ddb->y_plane[pipe][plane_id].end = start + y_plane_blocks;
4323 }
4324
4325 start += y_plane_blocks;
4326 }
4327
4328 return 0;
4329 }
4330
4331 /*
4332 * The max latency should be 257 (max the punit can code is 255 and we add 2us
4333 * for the read latency) and cpp should always be <= 8, so that
4334 * should allow pixel_rate up to ~2 GHz which seems sufficient since max
4335 * 2xcdclk is 1350 MHz and the pixel rate should never exceed that.
4336 */
4337 static uint_fixed_16_16_t
4338 skl_wm_method1(const struct drm_i915_private *dev_priv, uint32_t pixel_rate,
4339 uint8_t cpp, uint32_t latency)
4340 {
4341 uint32_t wm_intermediate_val;
4342 uint_fixed_16_16_t ret;
4343
4344 if (latency == 0)
4345 return FP_16_16_MAX;
4346
4347 wm_intermediate_val = latency * pixel_rate * cpp;
4348 ret = div_fixed16(wm_intermediate_val, 1000 * 512);
4349
4350 if (INTEL_GEN(dev_priv) >= 10)
4351 ret = add_fixed16_u32(ret, 1);
4352
4353 return ret;
4354 }
4355
4356 static uint_fixed_16_16_t skl_wm_method2(uint32_t pixel_rate,
4357 uint32_t pipe_htotal,
4358 uint32_t latency,
4359 uint_fixed_16_16_t plane_blocks_per_line)
4360 {
4361 uint32_t wm_intermediate_val;
4362 uint_fixed_16_16_t ret;
4363
4364 if (latency == 0)
4365 return FP_16_16_MAX;
4366
4367 wm_intermediate_val = latency * pixel_rate;
4368 wm_intermediate_val = DIV_ROUND_UP(wm_intermediate_val,
4369 pipe_htotal * 1000);
4370 ret = mul_u32_fixed16(wm_intermediate_val, plane_blocks_per_line);
4371 return ret;
4372 }
4373
4374 static uint_fixed_16_16_t
4375 intel_get_linetime_us(struct intel_crtc_state *cstate)
4376 {
4377 uint32_t pixel_rate;
4378 uint32_t crtc_htotal;
4379 uint_fixed_16_16_t linetime_us;
4380
4381 if (!cstate->base.active)
4382 return u32_to_fixed16(0);
4383
4384 pixel_rate = cstate->pixel_rate;
4385
4386 if (WARN_ON(pixel_rate == 0))
4387 return u32_to_fixed16(0);
4388
4389 crtc_htotal = cstate->base.adjusted_mode.crtc_htotal;
4390 linetime_us = div_fixed16(crtc_htotal * 1000, pixel_rate);
4391
4392 return linetime_us;
4393 }
4394
4395 static uint32_t
4396 skl_adjusted_plane_pixel_rate(const struct intel_crtc_state *cstate,
4397 const struct intel_plane_state *pstate)
4398 {
4399 uint64_t adjusted_pixel_rate;
4400 uint_fixed_16_16_t downscale_amount;
4401
4402 /* Shouldn't reach here on disabled planes... */
4403 if (WARN_ON(!intel_wm_plane_visible(cstate, pstate)))
4404 return 0;
4405
4406 /*
4407 * Adjusted plane pixel rate is just the pipe's adjusted pixel rate
4408 * with additional adjustments for plane-specific scaling.
4409 */
4410 adjusted_pixel_rate = cstate->pixel_rate;
4411 downscale_amount = skl_plane_downscale_amount(cstate, pstate);
4412
4413 return mul_round_up_u32_fixed16(adjusted_pixel_rate,
4414 downscale_amount);
4415 }
4416
4417 static int
4418 skl_compute_plane_wm_params(const struct drm_i915_private *dev_priv,
4419 struct intel_crtc_state *cstate,
4420 const struct intel_plane_state *intel_pstate,
4421 struct skl_wm_params *wp)
4422 {
4423 struct intel_plane *plane = to_intel_plane(intel_pstate->base.plane);
4424 const struct drm_plane_state *pstate = &intel_pstate->base;
4425 const struct drm_framebuffer *fb = pstate->fb;
4426 uint32_t interm_pbpl;
4427 struct intel_atomic_state *state =
4428 to_intel_atomic_state(cstate->base.state);
4429 bool apply_memory_bw_wa = skl_needs_memory_bw_wa(state);
4430
4431 if (!intel_wm_plane_visible(cstate, intel_pstate))
4432 return 0;
4433
4434 wp->y_tiled = fb->modifier == I915_FORMAT_MOD_Y_TILED ||
4435 fb->modifier == I915_FORMAT_MOD_Yf_TILED ||
4436 fb->modifier == I915_FORMAT_MOD_Y_TILED_CCS ||
4437 fb->modifier == I915_FORMAT_MOD_Yf_TILED_CCS;
4438 wp->x_tiled = fb->modifier == I915_FORMAT_MOD_X_TILED;
4439 wp->rc_surface = fb->modifier == I915_FORMAT_MOD_Y_TILED_CCS ||
4440 fb->modifier == I915_FORMAT_MOD_Yf_TILED_CCS;
4441
4442 if (plane->id == PLANE_CURSOR) {
4443 wp->width = intel_pstate->base.crtc_w;
4444 } else {
4445 /*
4446 * Src coordinates are already rotated by 270 degrees for
4447 * the 90/270 degree plane rotation cases (to match the
4448 * GTT mapping), hence no need to account for rotation here.
4449 */
4450 wp->width = drm_rect_width(&intel_pstate->base.src) >> 16;
4451 }
4452
4453 wp->cpp = (fb->format->format == DRM_FORMAT_NV12) ? fb->format->cpp[1] :
4454 fb->format->cpp[0];
4455 wp->plane_pixel_rate = skl_adjusted_plane_pixel_rate(cstate,
4456 intel_pstate);
4457
4458 if (drm_rotation_90_or_270(pstate->rotation)) {
4459
4460 switch (wp->cpp) {
4461 case 1:
4462 wp->y_min_scanlines = 16;
4463 break;
4464 case 2:
4465 wp->y_min_scanlines = 8;
4466 break;
4467 case 4:
4468 wp->y_min_scanlines = 4;
4469 break;
4470 default:
4471 MISSING_CASE(wp->cpp);
4472 return -EINVAL;
4473 }
4474 } else {
4475 wp->y_min_scanlines = 4;
4476 }
4477
4478 if (apply_memory_bw_wa)
4479 wp->y_min_scanlines *= 2;
4480
4481 wp->plane_bytes_per_line = wp->width * wp->cpp;
4482 if (wp->y_tiled) {
4483 interm_pbpl = DIV_ROUND_UP(wp->plane_bytes_per_line *
4484 wp->y_min_scanlines, 512);
4485
4486 if (INTEL_GEN(dev_priv) >= 10)
4487 interm_pbpl++;
4488
4489 wp->plane_blocks_per_line = div_fixed16(interm_pbpl,
4490 wp->y_min_scanlines);
4491 } else if (wp->x_tiled && IS_GEN9(dev_priv)) {
4492 interm_pbpl = DIV_ROUND_UP(wp->plane_bytes_per_line, 512);
4493 wp->plane_blocks_per_line = u32_to_fixed16(interm_pbpl);
4494 } else {
4495 interm_pbpl = DIV_ROUND_UP(wp->plane_bytes_per_line, 512) + 1;
4496 wp->plane_blocks_per_line = u32_to_fixed16(interm_pbpl);
4497 }
4498
4499 wp->y_tile_minimum = mul_u32_fixed16(wp->y_min_scanlines,
4500 wp->plane_blocks_per_line);
4501 wp->linetime_us = fixed16_to_u32_round_up(
4502 intel_get_linetime_us(cstate));
4503
4504 return 0;
4505 }
4506
4507 static int skl_compute_plane_wm(const struct drm_i915_private *dev_priv,
4508 struct intel_crtc_state *cstate,
4509 const struct intel_plane_state *intel_pstate,
4510 uint16_t ddb_allocation,
4511 int level,
4512 const struct skl_wm_params *wp,
4513 uint16_t *out_blocks, /* out */
4514 uint8_t *out_lines, /* out */
4515 bool *enabled /* out */)
4516 {
4517 const struct drm_plane_state *pstate = &intel_pstate->base;
4518 uint32_t latency = dev_priv->wm.skl_latency[level];
4519 uint_fixed_16_16_t method1, method2;
4520 uint_fixed_16_16_t selected_result;
4521 uint32_t res_blocks, res_lines;
4522 struct intel_atomic_state *state =
4523 to_intel_atomic_state(cstate->base.state);
4524 bool apply_memory_bw_wa = skl_needs_memory_bw_wa(state);
4525
4526 if (latency == 0 ||
4527 !intel_wm_plane_visible(cstate, intel_pstate)) {
4528 *enabled = false;
4529 return 0;
4530 }
4531
4532 /* Display WA #1141: kbl,cfl */
4533 if ((IS_KABYLAKE(dev_priv) || IS_COFFEELAKE(dev_priv) ||
4534 IS_CNL_REVID(dev_priv, CNL_REVID_A0, CNL_REVID_B0)) &&
4535 dev_priv->ipc_enabled)
4536 latency += 4;
4537
4538 if (apply_memory_bw_wa && wp->x_tiled)
4539 latency += 15;
4540
4541 method1 = skl_wm_method1(dev_priv, wp->plane_pixel_rate,
4542 wp->cpp, latency);
4543 method2 = skl_wm_method2(wp->plane_pixel_rate,
4544 cstate->base.adjusted_mode.crtc_htotal,
4545 latency,
4546 wp->plane_blocks_per_line);
4547
4548 if (wp->y_tiled) {
4549 selected_result = max_fixed16(method2, wp->y_tile_minimum);
4550 } else {
4551 if ((wp->cpp * cstate->base.adjusted_mode.crtc_htotal /
4552 512 < 1) && (wp->plane_bytes_per_line / 512 < 1))
4553 selected_result = method2;
4554 else if (ddb_allocation >=
4555 fixed16_to_u32_round_up(wp->plane_blocks_per_line))
4556 selected_result = min_fixed16(method1, method2);
4557 else if (latency >= wp->linetime_us)
4558 selected_result = min_fixed16(method1, method2);
4559 else
4560 selected_result = method1;
4561 }
4562
4563 res_blocks = fixed16_to_u32_round_up(selected_result) + 1;
4564 res_lines = div_round_up_fixed16(selected_result,
4565 wp->plane_blocks_per_line);
4566
4567 /* Display WA #1125: skl,bxt,kbl,glk */
4568 if (level == 0 && wp->rc_surface)
4569 res_blocks += fixed16_to_u32_round_up(wp->y_tile_minimum);
4570
4571 /* Display WA #1126: skl,bxt,kbl,glk */
4572 if (level >= 1 && level <= 7) {
4573 if (wp->y_tiled) {
4574 res_blocks += fixed16_to_u32_round_up(
4575 wp->y_tile_minimum);
4576 res_lines += wp->y_min_scanlines;
4577 } else {
4578 res_blocks++;
4579 }
4580 }
4581
4582 if (res_blocks >= ddb_allocation || res_lines > 31) {
4583 *enabled = false;
4584
4585 /*
4586 * If there are no valid level 0 watermarks, then we can't
4587 * support this display configuration.
4588 */
4589 if (level) {
4590 return 0;
4591 } else {
4592 struct drm_plane *plane = pstate->plane;
4593
4594 DRM_DEBUG_KMS("Requested display configuration exceeds system watermark limitations\n");
4595 DRM_DEBUG_KMS("[PLANE:%d:%s] blocks required = %u/%u, lines required = %u/31\n",
4596 plane->base.id, plane->name,
4597 res_blocks, ddb_allocation, res_lines);
4598 return -EINVAL;
4599 }
4600 }
4601
4602 *out_blocks = res_blocks;
4603 *out_lines = res_lines;
4604 *enabled = true;
4605
4606 return 0;
4607 }
4608
4609 static int
4610 skl_compute_wm_levels(const struct drm_i915_private *dev_priv,
4611 struct skl_ddb_allocation *ddb,
4612 struct intel_crtc_state *cstate,
4613 const struct intel_plane_state *intel_pstate,
4614 const struct skl_wm_params *wm_params,
4615 struct skl_plane_wm *wm)
4616 {
4617 struct intel_crtc *intel_crtc = to_intel_crtc(cstate->base.crtc);
4618 struct drm_plane *plane = intel_pstate->base.plane;
4619 struct intel_plane *intel_plane = to_intel_plane(plane);
4620 uint16_t ddb_blocks;
4621 enum pipe pipe = intel_crtc->pipe;
4622 int level, max_level = ilk_wm_max_level(dev_priv);
4623 int ret;
4624
4625 if (WARN_ON(!intel_pstate->base.fb))
4626 return -EINVAL;
4627
4628 ddb_blocks = skl_ddb_entry_size(&ddb->plane[pipe][intel_plane->id]);
4629
4630 for (level = 0; level <= max_level; level++) {
4631 struct skl_wm_level *result = &wm->wm[level];
4632
4633 ret = skl_compute_plane_wm(dev_priv,
4634 cstate,
4635 intel_pstate,
4636 ddb_blocks,
4637 level,
4638 wm_params,
4639 &result->plane_res_b,
4640 &result->plane_res_l,
4641 &result->plane_en);
4642 if (ret)
4643 return ret;
4644 }
4645
4646 return 0;
4647 }
4648
4649 static uint32_t
4650 skl_compute_linetime_wm(struct intel_crtc_state *cstate)
4651 {
4652 struct drm_atomic_state *state = cstate->base.state;
4653 struct drm_i915_private *dev_priv = to_i915(state->dev);
4654 uint_fixed_16_16_t linetime_us;
4655 uint32_t linetime_wm;
4656
4657 linetime_us = intel_get_linetime_us(cstate);
4658
4659 if (is_fixed16_zero(linetime_us))
4660 return 0;
4661
4662 linetime_wm = fixed16_to_u32_round_up(mul_u32_fixed16(8, linetime_us));
4663
4664 /* Display WA #1135: bxt:ALL GLK:ALL */
4665 if ((IS_BROXTON(dev_priv) || IS_GEMINILAKE(dev_priv)) &&
4666 dev_priv->ipc_enabled)
4667 linetime_wm /= 2;
4668
4669 return linetime_wm;
4670 }
4671
4672 static void skl_compute_transition_wm(struct intel_crtc_state *cstate,
4673 struct skl_wm_params *wp,
4674 struct skl_wm_level *wm_l0,
4675 uint16_t ddb_allocation,
4676 struct skl_wm_level *trans_wm /* out */)
4677 {
4678 struct drm_device *dev = cstate->base.crtc->dev;
4679 const struct drm_i915_private *dev_priv = to_i915(dev);
4680 uint16_t trans_min, trans_y_tile_min;
4681 const uint16_t trans_amount = 10; /* This is configurable amount */
4682 uint16_t trans_offset_b, res_blocks;
4683
4684 if (!cstate->base.active)
4685 goto exit;
4686
4687 /* Transition WM are not recommended by HW team for GEN9 */
4688 if (INTEL_GEN(dev_priv) <= 9)
4689 goto exit;
4690
4691 /* Transition WM don't make any sense if ipc is disabled */
4692 if (!dev_priv->ipc_enabled)
4693 goto exit;
4694
4695 if (INTEL_GEN(dev_priv) >= 10)
4696 trans_min = 4;
4697
4698 trans_offset_b = trans_min + trans_amount;
4699
4700 if (wp->y_tiled) {
4701 trans_y_tile_min = (uint16_t) mul_round_up_u32_fixed16(2,
4702 wp->y_tile_minimum);
4703 res_blocks = max(wm_l0->plane_res_b, trans_y_tile_min) +
4704 trans_offset_b;
4705 } else {
4706 res_blocks = wm_l0->plane_res_b + trans_offset_b;
4707
4708 /* WA BUG:1938466 add one block for non y-tile planes */
4709 if (IS_CNL_REVID(dev_priv, CNL_REVID_A0, CNL_REVID_A0))
4710 res_blocks += 1;
4711
4712 }
4713
4714 res_blocks += 1;
4715
4716 if (res_blocks < ddb_allocation) {
4717 trans_wm->plane_res_b = res_blocks;
4718 trans_wm->plane_en = true;
4719 return;
4720 }
4721
4722 exit:
4723 trans_wm->plane_en = false;
4724 }
4725
4726 static int skl_build_pipe_wm(struct intel_crtc_state *cstate,
4727 struct skl_ddb_allocation *ddb,
4728 struct skl_pipe_wm *pipe_wm)
4729 {
4730 struct drm_device *dev = cstate->base.crtc->dev;
4731 struct drm_crtc_state *crtc_state = &cstate->base;
4732 const struct drm_i915_private *dev_priv = to_i915(dev);
4733 struct drm_plane *plane;
4734 const struct drm_plane_state *pstate;
4735 struct skl_plane_wm *wm;
4736 int ret;
4737
4738 /*
4739 * We'll only calculate watermarks for planes that are actually
4740 * enabled, so make sure all other planes are set as disabled.
4741 */
4742 memset(pipe_wm->planes, 0, sizeof(pipe_wm->planes));
4743
4744 drm_atomic_crtc_state_for_each_plane_state(plane, pstate, crtc_state) {
4745 const struct intel_plane_state *intel_pstate =
4746 to_intel_plane_state(pstate);
4747 enum plane_id plane_id = to_intel_plane(plane)->id;
4748 struct skl_wm_params wm_params;
4749 enum pipe pipe = to_intel_crtc(cstate->base.crtc)->pipe;
4750 uint16_t ddb_blocks;
4751
4752 wm = &pipe_wm->planes[plane_id];
4753 ddb_blocks = skl_ddb_entry_size(&ddb->plane[pipe][plane_id]);
4754 memset(&wm_params, 0, sizeof(struct skl_wm_params));
4755
4756 ret = skl_compute_plane_wm_params(dev_priv, cstate,
4757 intel_pstate, &wm_params);
4758 if (ret)
4759 return ret;
4760
4761 ret = skl_compute_wm_levels(dev_priv, ddb, cstate,
4762 intel_pstate, &wm_params, wm);
4763 if (ret)
4764 return ret;
4765 skl_compute_transition_wm(cstate, &wm_params, &wm->wm[0],
4766 ddb_blocks, &wm->trans_wm);
4767 }
4768 pipe_wm->linetime = skl_compute_linetime_wm(cstate);
4769
4770 return 0;
4771 }
4772
4773 static void skl_ddb_entry_write(struct drm_i915_private *dev_priv,
4774 i915_reg_t reg,
4775 const struct skl_ddb_entry *entry)
4776 {
4777 if (entry->end)
4778 I915_WRITE(reg, (entry->end - 1) << 16 | entry->start);
4779 else
4780 I915_WRITE(reg, 0);
4781 }
4782
4783 static void skl_write_wm_level(struct drm_i915_private *dev_priv,
4784 i915_reg_t reg,
4785 const struct skl_wm_level *level)
4786 {
4787 uint32_t val = 0;
4788
4789 if (level->plane_en) {
4790 val |= PLANE_WM_EN;
4791 val |= level->plane_res_b;
4792 val |= level->plane_res_l << PLANE_WM_LINES_SHIFT;
4793 }
4794
4795 I915_WRITE(reg, val);
4796 }
4797
4798 static void skl_write_plane_wm(struct intel_crtc *intel_crtc,
4799 const struct skl_plane_wm *wm,
4800 const struct skl_ddb_allocation *ddb,
4801 enum plane_id plane_id)
4802 {
4803 struct drm_crtc *crtc = &intel_crtc->base;
4804 struct drm_device *dev = crtc->dev;
4805 struct drm_i915_private *dev_priv = to_i915(dev);
4806 int level, max_level = ilk_wm_max_level(dev_priv);
4807 enum pipe pipe = intel_crtc->pipe;
4808
4809 for (level = 0; level <= max_level; level++) {
4810 skl_write_wm_level(dev_priv, PLANE_WM(pipe, plane_id, level),
4811 &wm->wm[level]);
4812 }
4813 skl_write_wm_level(dev_priv, PLANE_WM_TRANS(pipe, plane_id),
4814 &wm->trans_wm);
4815
4816 skl_ddb_entry_write(dev_priv, PLANE_BUF_CFG(pipe, plane_id),
4817 &ddb->plane[pipe][plane_id]);
4818 skl_ddb_entry_write(dev_priv, PLANE_NV12_BUF_CFG(pipe, plane_id),
4819 &ddb->y_plane[pipe][plane_id]);
4820 }
4821
4822 static void skl_write_cursor_wm(struct intel_crtc *intel_crtc,
4823 const struct skl_plane_wm *wm,
4824 const struct skl_ddb_allocation *ddb)
4825 {
4826 struct drm_crtc *crtc = &intel_crtc->base;
4827 struct drm_device *dev = crtc->dev;
4828 struct drm_i915_private *dev_priv = to_i915(dev);
4829 int level, max_level = ilk_wm_max_level(dev_priv);
4830 enum pipe pipe = intel_crtc->pipe;
4831
4832 for (level = 0; level <= max_level; level++) {
4833 skl_write_wm_level(dev_priv, CUR_WM(pipe, level),
4834 &wm->wm[level]);
4835 }
4836 skl_write_wm_level(dev_priv, CUR_WM_TRANS(pipe), &wm->trans_wm);
4837
4838 skl_ddb_entry_write(dev_priv, CUR_BUF_CFG(pipe),
4839 &ddb->plane[pipe][PLANE_CURSOR]);
4840 }
4841
4842 bool skl_wm_level_equals(const struct skl_wm_level *l1,
4843 const struct skl_wm_level *l2)
4844 {
4845 if (l1->plane_en != l2->plane_en)
4846 return false;
4847
4848 /* If both planes aren't enabled, the rest shouldn't matter */
4849 if (!l1->plane_en)
4850 return true;
4851
4852 return (l1->plane_res_l == l2->plane_res_l &&
4853 l1->plane_res_b == l2->plane_res_b);
4854 }
4855
4856 static inline bool skl_ddb_entries_overlap(const struct skl_ddb_entry *a,
4857 const struct skl_ddb_entry *b)
4858 {
4859 return a->start < b->end && b->start < a->end;
4860 }
4861
4862 bool skl_ddb_allocation_overlaps(struct drm_i915_private *dev_priv,
4863 const struct skl_ddb_entry **entries,
4864 const struct skl_ddb_entry *ddb,
4865 int ignore)
4866 {
4867 enum pipe pipe;
4868
4869 for_each_pipe(dev_priv, pipe) {
4870 if (pipe != ignore && entries[pipe] &&
4871 skl_ddb_entries_overlap(ddb, entries[pipe]))
4872 return true;
4873 }
4874
4875 return false;
4876 }
4877
4878 static int skl_update_pipe_wm(struct drm_crtc_state *cstate,
4879 const struct skl_pipe_wm *old_pipe_wm,
4880 struct skl_pipe_wm *pipe_wm, /* out */
4881 struct skl_ddb_allocation *ddb, /* out */
4882 bool *changed /* out */)
4883 {
4884 struct intel_crtc_state *intel_cstate = to_intel_crtc_state(cstate);
4885 int ret;
4886
4887 ret = skl_build_pipe_wm(intel_cstate, ddb, pipe_wm);
4888 if (ret)
4889 return ret;
4890
4891 if (!memcmp(old_pipe_wm, pipe_wm, sizeof(*pipe_wm)))
4892 *changed = false;
4893 else
4894 *changed = true;
4895
4896 return 0;
4897 }
4898
4899 static uint32_t
4900 pipes_modified(struct drm_atomic_state *state)
4901 {
4902 struct drm_crtc *crtc;
4903 struct drm_crtc_state *cstate;
4904 uint32_t i, ret = 0;
4905
4906 for_each_new_crtc_in_state(state, crtc, cstate, i)
4907 ret |= drm_crtc_mask(crtc);
4908
4909 return ret;
4910 }
4911
4912 static int
4913 skl_ddb_add_affected_planes(struct intel_crtc_state *cstate)
4914 {
4915 struct drm_atomic_state *state = cstate->base.state;
4916 struct drm_device *dev = state->dev;
4917 struct drm_crtc *crtc = cstate->base.crtc;
4918 struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
4919 struct drm_i915_private *dev_priv = to_i915(dev);
4920 struct intel_atomic_state *intel_state = to_intel_atomic_state(state);
4921 struct skl_ddb_allocation *new_ddb = &intel_state->wm_results.ddb;
4922 struct skl_ddb_allocation *cur_ddb = &dev_priv->wm.skl_hw.ddb;
4923 struct drm_plane_state *plane_state;
4924 struct drm_plane *plane;
4925 enum pipe pipe = intel_crtc->pipe;
4926
4927 WARN_ON(!drm_atomic_get_existing_crtc_state(state, crtc));
4928
4929 drm_for_each_plane_mask(plane, dev, cstate->base.plane_mask) {
4930 enum plane_id plane_id = to_intel_plane(plane)->id;
4931
4932 if (skl_ddb_entry_equal(&cur_ddb->plane[pipe][plane_id],
4933 &new_ddb->plane[pipe][plane_id]) &&
4934 skl_ddb_entry_equal(&cur_ddb->y_plane[pipe][plane_id],
4935 &new_ddb->y_plane[pipe][plane_id]))
4936 continue;
4937
4938 plane_state = drm_atomic_get_plane_state(state, plane);
4939 if (IS_ERR(plane_state))
4940 return PTR_ERR(plane_state);
4941 }
4942
4943 return 0;
4944 }
4945
4946 static int
4947 skl_compute_ddb(struct drm_atomic_state *state)
4948 {
4949 struct drm_device *dev = state->dev;
4950 struct drm_i915_private *dev_priv = to_i915(dev);
4951 struct intel_atomic_state *intel_state = to_intel_atomic_state(state);
4952 struct intel_crtc *intel_crtc;
4953 struct skl_ddb_allocation *ddb = &intel_state->wm_results.ddb;
4954 uint32_t realloc_pipes = pipes_modified(state);
4955 int ret;
4956
4957 /*
4958 * If this is our first atomic update following hardware readout,
4959 * we can't trust the DDB that the BIOS programmed for us. Let's
4960 * pretend that all pipes switched active status so that we'll
4961 * ensure a full DDB recompute.
4962 */
4963 if (dev_priv->wm.distrust_bios_wm) {
4964 ret = drm_modeset_lock(&dev->mode_config.connection_mutex,
4965 state->acquire_ctx);
4966 if (ret)
4967 return ret;
4968
4969 intel_state->active_pipe_changes = ~0;
4970
4971 /*
4972 * We usually only initialize intel_state->active_crtcs if we
4973 * we're doing a modeset; make sure this field is always
4974 * initialized during the sanitization process that happens
4975 * on the first commit too.
4976 */
4977 if (!intel_state->modeset)
4978 intel_state->active_crtcs = dev_priv->active_crtcs;
4979 }
4980
4981 /*
4982 * If the modeset changes which CRTC's are active, we need to
4983 * recompute the DDB allocation for *all* active pipes, even
4984 * those that weren't otherwise being modified in any way by this
4985 * atomic commit. Due to the shrinking of the per-pipe allocations
4986 * when new active CRTC's are added, it's possible for a pipe that
4987 * we were already using and aren't changing at all here to suddenly
4988 * become invalid if its DDB needs exceeds its new allocation.
4989 *
4990 * Note that if we wind up doing a full DDB recompute, we can't let
4991 * any other display updates race with this transaction, so we need
4992 * to grab the lock on *all* CRTC's.
4993 */
4994 if (intel_state->active_pipe_changes) {
4995 realloc_pipes = ~0;
4996 intel_state->wm_results.dirty_pipes = ~0;
4997 }
4998
4999 /*
5000 * We're not recomputing for the pipes not included in the commit, so
5001 * make sure we start with the current state.
5002 */
5003 memcpy(ddb, &dev_priv->wm.skl_hw.ddb, sizeof(*ddb));
5004
5005 for_each_intel_crtc_mask(dev, intel_crtc, realloc_pipes) {
5006 struct intel_crtc_state *cstate;
5007
5008 cstate = intel_atomic_get_crtc_state(state, intel_crtc);
5009 if (IS_ERR(cstate))
5010 return PTR_ERR(cstate);
5011
5012 ret = skl_allocate_pipe_ddb(cstate, ddb);
5013 if (ret)
5014 return ret;
5015
5016 ret = skl_ddb_add_affected_planes(cstate);
5017 if (ret)
5018 return ret;
5019 }
5020
5021 return 0;
5022 }
5023
5024 static void
5025 skl_copy_wm_for_pipe(struct skl_wm_values *dst,
5026 struct skl_wm_values *src,
5027 enum pipe pipe)
5028 {
5029 memcpy(dst->ddb.y_plane[pipe], src->ddb.y_plane[pipe],
5030 sizeof(dst->ddb.y_plane[pipe]));
5031 memcpy(dst->ddb.plane[pipe], src->ddb.plane[pipe],
5032 sizeof(dst->ddb.plane[pipe]));
5033 }
5034
5035 static void
5036 skl_print_wm_changes(const struct drm_atomic_state *state)
5037 {
5038 const struct drm_device *dev = state->dev;
5039 const struct drm_i915_private *dev_priv = to_i915(dev);
5040 const struct intel_atomic_state *intel_state =
5041 to_intel_atomic_state(state);
5042 const struct drm_crtc *crtc;
5043 const struct drm_crtc_state *cstate;
5044 const struct intel_plane *intel_plane;
5045 const struct skl_ddb_allocation *old_ddb = &dev_priv->wm.skl_hw.ddb;
5046 const struct skl_ddb_allocation *new_ddb = &intel_state->wm_results.ddb;
5047 int i;
5048
5049 for_each_new_crtc_in_state(state, crtc, cstate, i) {
5050 const struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
5051 enum pipe pipe = intel_crtc->pipe;
5052
5053 for_each_intel_plane_on_crtc(dev, intel_crtc, intel_plane) {
5054 enum plane_id plane_id = intel_plane->id;
5055 const struct skl_ddb_entry *old, *new;
5056
5057 old = &old_ddb->plane[pipe][plane_id];
5058 new = &new_ddb->plane[pipe][plane_id];
5059
5060 if (skl_ddb_entry_equal(old, new))
5061 continue;
5062
5063 DRM_DEBUG_ATOMIC("[PLANE:%d:%s] ddb (%d - %d) -> (%d - %d)\n",
5064 intel_plane->base.base.id,
5065 intel_plane->base.name,
5066 old->start, old->end,
5067 new->start, new->end);
5068 }
5069 }
5070 }
5071
5072 static int
5073 skl_compute_wm(struct drm_atomic_state *state)
5074 {
5075 struct drm_crtc *crtc;
5076 struct drm_crtc_state *cstate;
5077 struct intel_atomic_state *intel_state = to_intel_atomic_state(state);
5078 struct skl_wm_values *results = &intel_state->wm_results;
5079 struct drm_device *dev = state->dev;
5080 struct skl_pipe_wm *pipe_wm;
5081 bool changed = false;
5082 int ret, i;
5083
5084 /*
5085 * When we distrust bios wm we always need to recompute to set the
5086 * expected DDB allocations for each CRTC.
5087 */
5088 if (to_i915(dev)->wm.distrust_bios_wm)
5089 changed = true;
5090
5091 /*
5092 * If this transaction isn't actually touching any CRTC's, don't
5093 * bother with watermark calculation. Note that if we pass this
5094 * test, we're guaranteed to hold at least one CRTC state mutex,
5095 * which means we can safely use values like dev_priv->active_crtcs
5096 * since any racing commits that want to update them would need to
5097 * hold _all_ CRTC state mutexes.
5098 */
5099 for_each_new_crtc_in_state(state, crtc, cstate, i)
5100 changed = true;
5101
5102 if (!changed)
5103 return 0;
5104
5105 /* Clear all dirty flags */
5106 results->dirty_pipes = 0;
5107
5108 ret = skl_compute_ddb(state);
5109 if (ret)
5110 return ret;
5111
5112 /*
5113 * Calculate WM's for all pipes that are part of this transaction.
5114 * Note that the DDB allocation above may have added more CRTC's that
5115 * weren't otherwise being modified (and set bits in dirty_pipes) if
5116 * pipe allocations had to change.
5117 *
5118 * FIXME: Now that we're doing this in the atomic check phase, we
5119 * should allow skl_update_pipe_wm() to return failure in cases where
5120 * no suitable watermark values can be found.
5121 */
5122 for_each_new_crtc_in_state(state, crtc, cstate, i) {
5123 struct intel_crtc_state *intel_cstate =
5124 to_intel_crtc_state(cstate);
5125 const struct skl_pipe_wm *old_pipe_wm =
5126 &to_intel_crtc_state(crtc->state)->wm.skl.optimal;
5127
5128 pipe_wm = &intel_cstate->wm.skl.optimal;
5129 ret = skl_update_pipe_wm(cstate, old_pipe_wm, pipe_wm,
5130 &results->ddb, &changed);
5131 if (ret)
5132 return ret;
5133
5134 if (changed)
5135 results->dirty_pipes |= drm_crtc_mask(crtc);
5136
5137 if ((results->dirty_pipes & drm_crtc_mask(crtc)) == 0)
5138 /* This pipe's WM's did not change */
5139 continue;
5140
5141 intel_cstate->update_wm_pre = true;
5142 }
5143
5144 skl_print_wm_changes(state);
5145
5146 return 0;
5147 }
5148
5149 static void skl_atomic_update_crtc_wm(struct intel_atomic_state *state,
5150 struct intel_crtc_state *cstate)
5151 {
5152 struct intel_crtc *crtc = to_intel_crtc(cstate->base.crtc);
5153 struct drm_i915_private *dev_priv = to_i915(state->base.dev);
5154 struct skl_pipe_wm *pipe_wm = &cstate->wm.skl.optimal;
5155 const struct skl_ddb_allocation *ddb = &state->wm_results.ddb;
5156 enum pipe pipe = crtc->pipe;
5157 enum plane_id plane_id;
5158
5159 if (!(state->wm_results.dirty_pipes & drm_crtc_mask(&crtc->base)))
5160 return;
5161
5162 I915_WRITE(PIPE_WM_LINETIME(pipe), pipe_wm->linetime);
5163
5164 for_each_plane_id_on_crtc(crtc, plane_id) {
5165 if (plane_id != PLANE_CURSOR)
5166 skl_write_plane_wm(crtc, &pipe_wm->planes[plane_id],
5167 ddb, plane_id);
5168 else
5169 skl_write_cursor_wm(crtc, &pipe_wm->planes[plane_id],
5170 ddb);
5171 }
5172 }
5173
5174 static void skl_initial_wm(struct intel_atomic_state *state,
5175 struct intel_crtc_state *cstate)
5176 {
5177 struct intel_crtc *intel_crtc = to_intel_crtc(cstate->base.crtc);
5178 struct drm_device *dev = intel_crtc->base.dev;
5179 struct drm_i915_private *dev_priv = to_i915(dev);
5180 struct skl_wm_values *results = &state->wm_results;
5181 struct skl_wm_values *hw_vals = &dev_priv->wm.skl_hw;
5182 enum pipe pipe = intel_crtc->pipe;
5183
5184 if ((results->dirty_pipes & drm_crtc_mask(&intel_crtc->base)) == 0)
5185 return;
5186
5187 mutex_lock(&dev_priv->wm.wm_mutex);
5188
5189 if (cstate->base.active_changed)
5190 skl_atomic_update_crtc_wm(state, cstate);
5191
5192 skl_copy_wm_for_pipe(hw_vals, results, pipe);
5193
5194 mutex_unlock(&dev_priv->wm.wm_mutex);
5195 }
5196
5197 static void ilk_compute_wm_config(struct drm_device *dev,
5198 struct intel_wm_config *config)
5199 {
5200 struct intel_crtc *crtc;
5201
5202 /* Compute the currently _active_ config */
5203 for_each_intel_crtc(dev, crtc) {
5204 const struct intel_pipe_wm *wm = &crtc->wm.active.ilk;
5205
5206 if (!wm->pipe_enabled)
5207 continue;
5208
5209 config->sprites_enabled |= wm->sprites_enabled;
5210 config->sprites_scaled |= wm->sprites_scaled;
5211 config->num_pipes_active++;
5212 }
5213 }
5214
5215 static void ilk_program_watermarks(struct drm_i915_private *dev_priv)
5216 {
5217 struct drm_device *dev = &dev_priv->drm;
5218 struct intel_pipe_wm lp_wm_1_2 = {}, lp_wm_5_6 = {}, *best_lp_wm;
5219 struct ilk_wm_maximums max;
5220 struct intel_wm_config config = {};
5221 struct ilk_wm_values results = {};
5222 enum intel_ddb_partitioning partitioning;
5223
5224 ilk_compute_wm_config(dev, &config);
5225
5226 ilk_compute_wm_maximums(dev, 1, &config, INTEL_DDB_PART_1_2, &max);
5227 ilk_wm_merge(dev, &config, &max, &lp_wm_1_2);
5228
5229 /* 5/6 split only in single pipe config on IVB+ */
5230 if (INTEL_GEN(dev_priv) >= 7 &&
5231 config.num_pipes_active == 1 && config.sprites_enabled) {
5232 ilk_compute_wm_maximums(dev, 1, &config, INTEL_DDB_PART_5_6, &max);
5233 ilk_wm_merge(dev, &config, &max, &lp_wm_5_6);
5234
5235 best_lp_wm = ilk_find_best_result(dev, &lp_wm_1_2, &lp_wm_5_6);
5236 } else {
5237 best_lp_wm = &lp_wm_1_2;
5238 }
5239
5240 partitioning = (best_lp_wm == &lp_wm_1_2) ?
5241 INTEL_DDB_PART_1_2 : INTEL_DDB_PART_5_6;
5242
5243 ilk_compute_wm_results(dev, best_lp_wm, partitioning, &results);
5244
5245 ilk_write_wm_values(dev_priv, &results);
5246 }
5247
5248 static void ilk_initial_watermarks(struct intel_atomic_state *state,
5249 struct intel_crtc_state *cstate)
5250 {
5251 struct drm_i915_private *dev_priv = to_i915(cstate->base.crtc->dev);
5252 struct intel_crtc *intel_crtc = to_intel_crtc(cstate->base.crtc);
5253
5254 mutex_lock(&dev_priv->wm.wm_mutex);
5255 intel_crtc->wm.active.ilk = cstate->wm.ilk.intermediate;
5256 ilk_program_watermarks(dev_priv);
5257 mutex_unlock(&dev_priv->wm.wm_mutex);
5258 }
5259
5260 static void ilk_optimize_watermarks(struct intel_atomic_state *state,
5261 struct intel_crtc_state *cstate)
5262 {
5263 struct drm_i915_private *dev_priv = to_i915(cstate->base.crtc->dev);
5264 struct intel_crtc *intel_crtc = to_intel_crtc(cstate->base.crtc);
5265
5266 mutex_lock(&dev_priv->wm.wm_mutex);
5267 if (cstate->wm.need_postvbl_update) {
5268 intel_crtc->wm.active.ilk = cstate->wm.ilk.optimal;
5269 ilk_program_watermarks(dev_priv);
5270 }
5271 mutex_unlock(&dev_priv->wm.wm_mutex);
5272 }
5273
5274 static inline void skl_wm_level_from_reg_val(uint32_t val,
5275 struct skl_wm_level *level)
5276 {
5277 level->plane_en = val & PLANE_WM_EN;
5278 level->plane_res_b = val & PLANE_WM_BLOCKS_MASK;
5279 level->plane_res_l = (val >> PLANE_WM_LINES_SHIFT) &
5280 PLANE_WM_LINES_MASK;
5281 }
5282
5283 void skl_pipe_wm_get_hw_state(struct drm_crtc *crtc,
5284 struct skl_pipe_wm *out)
5285 {
5286 struct drm_i915_private *dev_priv = to_i915(crtc->dev);
5287 struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
5288 enum pipe pipe = intel_crtc->pipe;
5289 int level, max_level;
5290 enum plane_id plane_id;
5291 uint32_t val;
5292
5293 max_level = ilk_wm_max_level(dev_priv);
5294
5295 for_each_plane_id_on_crtc(intel_crtc, plane_id) {
5296 struct skl_plane_wm *wm = &out->planes[plane_id];
5297
5298 for (level = 0; level <= max_level; level++) {
5299 if (plane_id != PLANE_CURSOR)
5300 val = I915_READ(PLANE_WM(pipe, plane_id, level));
5301 else
5302 val = I915_READ(CUR_WM(pipe, level));
5303
5304 skl_wm_level_from_reg_val(val, &wm->wm[level]);
5305 }
5306
5307 if (plane_id != PLANE_CURSOR)
5308 val = I915_READ(PLANE_WM_TRANS(pipe, plane_id));
5309 else
5310 val = I915_READ(CUR_WM_TRANS(pipe));
5311
5312 skl_wm_level_from_reg_val(val, &wm->trans_wm);
5313 }
5314
5315 if (!intel_crtc->active)
5316 return;
5317
5318 out->linetime = I915_READ(PIPE_WM_LINETIME(pipe));
5319 }
5320
5321 void skl_wm_get_hw_state(struct drm_device *dev)
5322 {
5323 struct drm_i915_private *dev_priv = to_i915(dev);
5324 struct skl_wm_values *hw = &dev_priv->wm.skl_hw;
5325 struct skl_ddb_allocation *ddb = &dev_priv->wm.skl_hw.ddb;
5326 struct drm_crtc *crtc;
5327 struct intel_crtc *intel_crtc;
5328 struct intel_crtc_state *cstate;
5329
5330 skl_ddb_get_hw_state(dev_priv, ddb);
5331 list_for_each_entry(crtc, &dev->mode_config.crtc_list, head) {
5332 intel_crtc = to_intel_crtc(crtc);
5333 cstate = to_intel_crtc_state(crtc->state);
5334
5335 skl_pipe_wm_get_hw_state(crtc, &cstate->wm.skl.optimal);
5336
5337 if (intel_crtc->active)
5338 hw->dirty_pipes |= drm_crtc_mask(crtc);
5339 }
5340
5341 if (dev_priv->active_crtcs) {
5342 /* Fully recompute DDB on first atomic commit */
5343 dev_priv->wm.distrust_bios_wm = true;
5344 } else {
5345 /* Easy/common case; just sanitize DDB now if everything off */
5346 memset(ddb, 0, sizeof(*ddb));
5347 }
5348 }
5349
5350 static void ilk_pipe_wm_get_hw_state(struct drm_crtc *crtc)
5351 {
5352 struct drm_device *dev = crtc->dev;
5353 struct drm_i915_private *dev_priv = to_i915(dev);
5354 struct ilk_wm_values *hw = &dev_priv->wm.hw;
5355 struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
5356 struct intel_crtc_state *cstate = to_intel_crtc_state(crtc->state);
5357 struct intel_pipe_wm *active = &cstate->wm.ilk.optimal;
5358 enum pipe pipe = intel_crtc->pipe;
5359 static const i915_reg_t wm0_pipe_reg[] = {
5360 [PIPE_A] = WM0_PIPEA_ILK,
5361 [PIPE_B] = WM0_PIPEB_ILK,
5362 [PIPE_C] = WM0_PIPEC_IVB,
5363 };
5364
5365 hw->wm_pipe[pipe] = I915_READ(wm0_pipe_reg[pipe]);
5366 if (IS_HASWELL(dev_priv) || IS_BROADWELL(dev_priv))
5367 hw->wm_linetime[pipe] = I915_READ(PIPE_WM_LINETIME(pipe));
5368
5369 memset(active, 0, sizeof(*active));
5370
5371 active->pipe_enabled = intel_crtc->active;
5372
5373 if (active->pipe_enabled) {
5374 u32 tmp = hw->wm_pipe[pipe];
5375
5376 /*
5377 * For active pipes LP0 watermark is marked as
5378 * enabled, and LP1+ watermaks as disabled since
5379 * we can't really reverse compute them in case
5380 * multiple pipes are active.
5381 */
5382 active->wm[0].enable = true;
5383 active->wm[0].pri_val = (tmp & WM0_PIPE_PLANE_MASK) >> WM0_PIPE_PLANE_SHIFT;
5384 active->wm[0].spr_val = (tmp & WM0_PIPE_SPRITE_MASK) >> WM0_PIPE_SPRITE_SHIFT;
5385 active->wm[0].cur_val = tmp & WM0_PIPE_CURSOR_MASK;
5386 active->linetime = hw->wm_linetime[pipe];
5387 } else {
5388 int level, max_level = ilk_wm_max_level(dev_priv);
5389
5390 /*
5391 * For inactive pipes, all watermark levels
5392 * should be marked as enabled but zeroed,
5393 * which is what we'd compute them to.
5394 */
5395 for (level = 0; level <= max_level; level++)
5396 active->wm[level].enable = true;
5397 }
5398
5399 intel_crtc->wm.active.ilk = *active;
5400 }
5401
5402 #define _FW_WM(value, plane) \
5403 (((value) & DSPFW_ ## plane ## _MASK) >> DSPFW_ ## plane ## _SHIFT)
5404 #define _FW_WM_VLV(value, plane) \
5405 (((value) & DSPFW_ ## plane ## _MASK_VLV) >> DSPFW_ ## plane ## _SHIFT)
5406
5407 static void g4x_read_wm_values(struct drm_i915_private *dev_priv,
5408 struct g4x_wm_values *wm)
5409 {
5410 uint32_t tmp;
5411
5412 tmp = I915_READ(DSPFW1);
5413 wm->sr.plane = _FW_WM(tmp, SR);
5414 wm->pipe[PIPE_B].plane[PLANE_CURSOR] = _FW_WM(tmp, CURSORB);
5415 wm->pipe[PIPE_B].plane[PLANE_PRIMARY] = _FW_WM(tmp, PLANEB);
5416 wm->pipe[PIPE_A].plane[PLANE_PRIMARY] = _FW_WM(tmp, PLANEA);
5417
5418 tmp = I915_READ(DSPFW2);
5419 wm->fbc_en = tmp & DSPFW_FBC_SR_EN;
5420 wm->sr.fbc = _FW_WM(tmp, FBC_SR);
5421 wm->hpll.fbc = _FW_WM(tmp, FBC_HPLL_SR);
5422 wm->pipe[PIPE_B].plane[PLANE_SPRITE0] = _FW_WM(tmp, SPRITEB);
5423 wm->pipe[PIPE_A].plane[PLANE_CURSOR] = _FW_WM(tmp, CURSORA);
5424 wm->pipe[PIPE_A].plane[PLANE_SPRITE0] = _FW_WM(tmp, SPRITEA);
5425
5426 tmp = I915_READ(DSPFW3);
5427 wm->hpll_en = tmp & DSPFW_HPLL_SR_EN;
5428 wm->sr.cursor = _FW_WM(tmp, CURSOR_SR);
5429 wm->hpll.cursor = _FW_WM(tmp, HPLL_CURSOR);
5430 wm->hpll.plane = _FW_WM(tmp, HPLL_SR);
5431 }
5432
5433 static void vlv_read_wm_values(struct drm_i915_private *dev_priv,
5434 struct vlv_wm_values *wm)
5435 {
5436 enum pipe pipe;
5437 uint32_t tmp;
5438
5439 for_each_pipe(dev_priv, pipe) {
5440 tmp = I915_READ(VLV_DDL(pipe));
5441
5442 wm->ddl[pipe].plane[PLANE_PRIMARY] =
5443 (tmp >> DDL_PLANE_SHIFT) & (DDL_PRECISION_HIGH | DRAIN_LATENCY_MASK);
5444 wm->ddl[pipe].plane[PLANE_CURSOR] =
5445 (tmp >> DDL_CURSOR_SHIFT) & (DDL_PRECISION_HIGH | DRAIN_LATENCY_MASK);
5446 wm->ddl[pipe].plane[PLANE_SPRITE0] =
5447 (tmp >> DDL_SPRITE_SHIFT(0)) & (DDL_PRECISION_HIGH | DRAIN_LATENCY_MASK);
5448 wm->ddl[pipe].plane[PLANE_SPRITE1] =
5449 (tmp >> DDL_SPRITE_SHIFT(1)) & (DDL_PRECISION_HIGH | DRAIN_LATENCY_MASK);
5450 }
5451
5452 tmp = I915_READ(DSPFW1);
5453 wm->sr.plane = _FW_WM(tmp, SR);
5454 wm->pipe[PIPE_B].plane[PLANE_CURSOR] = _FW_WM(tmp, CURSORB);
5455 wm->pipe[PIPE_B].plane[PLANE_PRIMARY] = _FW_WM_VLV(tmp, PLANEB);
5456 wm->pipe[PIPE_A].plane[PLANE_PRIMARY] = _FW_WM_VLV(tmp, PLANEA);
5457
5458 tmp = I915_READ(DSPFW2);
5459 wm->pipe[PIPE_A].plane[PLANE_SPRITE1] = _FW_WM_VLV(tmp, SPRITEB);
5460 wm->pipe[PIPE_A].plane[PLANE_CURSOR] = _FW_WM(tmp, CURSORA);
5461 wm->pipe[PIPE_A].plane[PLANE_SPRITE0] = _FW_WM_VLV(tmp, SPRITEA);
5462
5463 tmp = I915_READ(DSPFW3);
5464 wm->sr.cursor = _FW_WM(tmp, CURSOR_SR);
5465
5466 if (IS_CHERRYVIEW(dev_priv)) {
5467 tmp = I915_READ(DSPFW7_CHV);
5468 wm->pipe[PIPE_B].plane[PLANE_SPRITE1] = _FW_WM_VLV(tmp, SPRITED);
5469 wm->pipe[PIPE_B].plane[PLANE_SPRITE0] = _FW_WM_VLV(tmp, SPRITEC);
5470
5471 tmp = I915_READ(DSPFW8_CHV);
5472 wm->pipe[PIPE_C].plane[PLANE_SPRITE1] = _FW_WM_VLV(tmp, SPRITEF);
5473 wm->pipe[PIPE_C].plane[PLANE_SPRITE0] = _FW_WM_VLV(tmp, SPRITEE);
5474
5475 tmp = I915_READ(DSPFW9_CHV);
5476 wm->pipe[PIPE_C].plane[PLANE_PRIMARY] = _FW_WM_VLV(tmp, PLANEC);
5477 wm->pipe[PIPE_C].plane[PLANE_CURSOR] = _FW_WM(tmp, CURSORC);
5478
5479 tmp = I915_READ(DSPHOWM);
5480 wm->sr.plane |= _FW_WM(tmp, SR_HI) << 9;
5481 wm->pipe[PIPE_C].plane[PLANE_SPRITE1] |= _FW_WM(tmp, SPRITEF_HI) << 8;
5482 wm->pipe[PIPE_C].plane[PLANE_SPRITE0] |= _FW_WM(tmp, SPRITEE_HI) << 8;
5483 wm->pipe[PIPE_C].plane[PLANE_PRIMARY] |= _FW_WM(tmp, PLANEC_HI) << 8;
5484 wm->pipe[PIPE_B].plane[PLANE_SPRITE1] |= _FW_WM(tmp, SPRITED_HI) << 8;
5485 wm->pipe[PIPE_B].plane[PLANE_SPRITE0] |= _FW_WM(tmp, SPRITEC_HI) << 8;
5486 wm->pipe[PIPE_B].plane[PLANE_PRIMARY] |= _FW_WM(tmp, PLANEB_HI) << 8;
5487 wm->pipe[PIPE_A].plane[PLANE_SPRITE1] |= _FW_WM(tmp, SPRITEB_HI) << 8;
5488 wm->pipe[PIPE_A].plane[PLANE_SPRITE0] |= _FW_WM(tmp, SPRITEA_HI) << 8;
5489 wm->pipe[PIPE_A].plane[PLANE_PRIMARY] |= _FW_WM(tmp, PLANEA_HI) << 8;
5490 } else {
5491 tmp = I915_READ(DSPFW7);
5492 wm->pipe[PIPE_B].plane[PLANE_SPRITE1] = _FW_WM_VLV(tmp, SPRITED);
5493 wm->pipe[PIPE_B].plane[PLANE_SPRITE0] = _FW_WM_VLV(tmp, SPRITEC);
5494
5495 tmp = I915_READ(DSPHOWM);
5496 wm->sr.plane |= _FW_WM(tmp, SR_HI) << 9;
5497 wm->pipe[PIPE_B].plane[PLANE_SPRITE1] |= _FW_WM(tmp, SPRITED_HI) << 8;
5498 wm->pipe[PIPE_B].plane[PLANE_SPRITE0] |= _FW_WM(tmp, SPRITEC_HI) << 8;
5499 wm->pipe[PIPE_B].plane[PLANE_PRIMARY] |= _FW_WM(tmp, PLANEB_HI) << 8;
5500 wm->pipe[PIPE_A].plane[PLANE_SPRITE1] |= _FW_WM(tmp, SPRITEB_HI) << 8;
5501 wm->pipe[PIPE_A].plane[PLANE_SPRITE0] |= _FW_WM(tmp, SPRITEA_HI) << 8;
5502 wm->pipe[PIPE_A].plane[PLANE_PRIMARY] |= _FW_WM(tmp, PLANEA_HI) << 8;
5503 }
5504 }
5505
5506 #undef _FW_WM
5507 #undef _FW_WM_VLV
5508
5509 void g4x_wm_get_hw_state(struct drm_device *dev)
5510 {
5511 struct drm_i915_private *dev_priv = to_i915(dev);
5512 struct g4x_wm_values *wm = &dev_priv->wm.g4x;
5513 struct intel_crtc *crtc;
5514
5515 g4x_read_wm_values(dev_priv, wm);
5516
5517 wm->cxsr = I915_READ(FW_BLC_SELF) & FW_BLC_SELF_EN;
5518
5519 for_each_intel_crtc(dev, crtc) {
5520 struct intel_crtc_state *crtc_state =
5521 to_intel_crtc_state(crtc->base.state);
5522 struct g4x_wm_state *active = &crtc->wm.active.g4x;
5523 struct g4x_pipe_wm *raw;
5524 enum pipe pipe = crtc->pipe;
5525 enum plane_id plane_id;
5526 int level, max_level;
5527
5528 active->cxsr = wm->cxsr;
5529 active->hpll_en = wm->hpll_en;
5530 active->fbc_en = wm->fbc_en;
5531
5532 active->sr = wm->sr;
5533 active->hpll = wm->hpll;
5534
5535 for_each_plane_id_on_crtc(crtc, plane_id) {
5536 active->wm.plane[plane_id] =
5537 wm->pipe[pipe].plane[plane_id];
5538 }
5539
5540 if (wm->cxsr && wm->hpll_en)
5541 max_level = G4X_WM_LEVEL_HPLL;
5542 else if (wm->cxsr)
5543 max_level = G4X_WM_LEVEL_SR;
5544 else
5545 max_level = G4X_WM_LEVEL_NORMAL;
5546
5547 level = G4X_WM_LEVEL_NORMAL;
5548 raw = &crtc_state->wm.g4x.raw[level];
5549 for_each_plane_id_on_crtc(crtc, plane_id)
5550 raw->plane[plane_id] = active->wm.plane[plane_id];
5551
5552 if (++level > max_level)
5553 goto out;
5554
5555 raw = &crtc_state->wm.g4x.raw[level];
5556 raw->plane[PLANE_PRIMARY] = active->sr.plane;
5557 raw->plane[PLANE_CURSOR] = active->sr.cursor;
5558 raw->plane[PLANE_SPRITE0] = 0;
5559 raw->fbc = active->sr.fbc;
5560
5561 if (++level > max_level)
5562 goto out;
5563
5564 raw = &crtc_state->wm.g4x.raw[level];
5565 raw->plane[PLANE_PRIMARY] = active->hpll.plane;
5566 raw->plane[PLANE_CURSOR] = active->hpll.cursor;
5567 raw->plane[PLANE_SPRITE0] = 0;
5568 raw->fbc = active->hpll.fbc;
5569
5570 out:
5571 for_each_plane_id_on_crtc(crtc, plane_id)
5572 g4x_raw_plane_wm_set(crtc_state, level,
5573 plane_id, USHRT_MAX);
5574 g4x_raw_fbc_wm_set(crtc_state, level, USHRT_MAX);
5575
5576 crtc_state->wm.g4x.optimal = *active;
5577 crtc_state->wm.g4x.intermediate = *active;
5578
5579 DRM_DEBUG_KMS("Initial watermarks: pipe %c, plane=%d, cursor=%d, sprite=%d\n",
5580 pipe_name(pipe),
5581 wm->pipe[pipe].plane[PLANE_PRIMARY],
5582 wm->pipe[pipe].plane[PLANE_CURSOR],
5583 wm->pipe[pipe].plane[PLANE_SPRITE0]);
5584 }
5585
5586 DRM_DEBUG_KMS("Initial SR watermarks: plane=%d, cursor=%d fbc=%d\n",
5587 wm->sr.plane, wm->sr.cursor, wm->sr.fbc);
5588 DRM_DEBUG_KMS("Initial HPLL watermarks: plane=%d, SR cursor=%d fbc=%d\n",
5589 wm->hpll.plane, wm->hpll.cursor, wm->hpll.fbc);
5590 DRM_DEBUG_KMS("Initial SR=%s HPLL=%s FBC=%s\n",
5591 yesno(wm->cxsr), yesno(wm->hpll_en), yesno(wm->fbc_en));
5592 }
5593
5594 void g4x_wm_sanitize(struct drm_i915_private *dev_priv)
5595 {
5596 struct intel_plane *plane;
5597 struct intel_crtc *crtc;
5598
5599 mutex_lock(&dev_priv->wm.wm_mutex);
5600
5601 for_each_intel_plane(&dev_priv->drm, plane) {
5602 struct intel_crtc *crtc =
5603 intel_get_crtc_for_pipe(dev_priv, plane->pipe);
5604 struct intel_crtc_state *crtc_state =
5605 to_intel_crtc_state(crtc->base.state);
5606 struct intel_plane_state *plane_state =
5607 to_intel_plane_state(plane->base.state);
5608 struct g4x_wm_state *wm_state = &crtc_state->wm.g4x.optimal;
5609 enum plane_id plane_id = plane->id;
5610 int level;
5611
5612 if (plane_state->base.visible)
5613 continue;
5614
5615 for (level = 0; level < 3; level++) {
5616 struct g4x_pipe_wm *raw =
5617 &crtc_state->wm.g4x.raw[level];
5618
5619 raw->plane[plane_id] = 0;
5620 wm_state->wm.plane[plane_id] = 0;
5621 }
5622
5623 if (plane_id == PLANE_PRIMARY) {
5624 for (level = 0; level < 3; level++) {
5625 struct g4x_pipe_wm *raw =
5626 &crtc_state->wm.g4x.raw[level];
5627 raw->fbc = 0;
5628 }
5629
5630 wm_state->sr.fbc = 0;
5631 wm_state->hpll.fbc = 0;
5632 wm_state->fbc_en = false;
5633 }
5634 }
5635
5636 for_each_intel_crtc(&dev_priv->drm, crtc) {
5637 struct intel_crtc_state *crtc_state =
5638 to_intel_crtc_state(crtc->base.state);
5639
5640 crtc_state->wm.g4x.intermediate =
5641 crtc_state->wm.g4x.optimal;
5642 crtc->wm.active.g4x = crtc_state->wm.g4x.optimal;
5643 }
5644
5645 g4x_program_watermarks(dev_priv);
5646
5647 mutex_unlock(&dev_priv->wm.wm_mutex);
5648 }
5649
5650 void vlv_wm_get_hw_state(struct drm_device *dev)
5651 {
5652 struct drm_i915_private *dev_priv = to_i915(dev);
5653 struct vlv_wm_values *wm = &dev_priv->wm.vlv;
5654 struct intel_crtc *crtc;
5655 u32 val;
5656
5657 vlv_read_wm_values(dev_priv, wm);
5658
5659 wm->cxsr = I915_READ(FW_BLC_SELF_VLV) & FW_CSPWRDWNEN;
5660 wm->level = VLV_WM_LEVEL_PM2;
5661
5662 if (IS_CHERRYVIEW(dev_priv)) {
5663 mutex_lock(&dev_priv->pcu_lock);
5664
5665 val = vlv_punit_read(dev_priv, PUNIT_REG_DSPFREQ);
5666 if (val & DSP_MAXFIFO_PM5_ENABLE)
5667 wm->level = VLV_WM_LEVEL_PM5;
5668
5669 /*
5670 * If DDR DVFS is disabled in the BIOS, Punit
5671 * will never ack the request. So if that happens
5672 * assume we don't have to enable/disable DDR DVFS
5673 * dynamically. To test that just set the REQ_ACK
5674 * bit to poke the Punit, but don't change the
5675 * HIGH/LOW bits so that we don't actually change
5676 * the current state.
5677 */
5678 val = vlv_punit_read(dev_priv, PUNIT_REG_DDR_SETUP2);
5679 val |= FORCE_DDR_FREQ_REQ_ACK;
5680 vlv_punit_write(dev_priv, PUNIT_REG_DDR_SETUP2, val);
5681
5682 if (wait_for((vlv_punit_read(dev_priv, PUNIT_REG_DDR_SETUP2) &
5683 FORCE_DDR_FREQ_REQ_ACK) == 0, 3)) {
5684 DRM_DEBUG_KMS("Punit not acking DDR DVFS request, "
5685 "assuming DDR DVFS is disabled\n");
5686 dev_priv->wm.max_level = VLV_WM_LEVEL_PM5;
5687 } else {
5688 val = vlv_punit_read(dev_priv, PUNIT_REG_DDR_SETUP2);
5689 if ((val & FORCE_DDR_HIGH_FREQ) == 0)
5690 wm->level = VLV_WM_LEVEL_DDR_DVFS;
5691 }
5692
5693 mutex_unlock(&dev_priv->pcu_lock);
5694 }
5695
5696 for_each_intel_crtc(dev, crtc) {
5697 struct intel_crtc_state *crtc_state =
5698 to_intel_crtc_state(crtc->base.state);
5699 struct vlv_wm_state *active = &crtc->wm.active.vlv;
5700 const struct vlv_fifo_state *fifo_state =
5701 &crtc_state->wm.vlv.fifo_state;
5702 enum pipe pipe = crtc->pipe;
5703 enum plane_id plane_id;
5704 int level;
5705
5706 vlv_get_fifo_size(crtc_state);
5707
5708 active->num_levels = wm->level + 1;
5709 active->cxsr = wm->cxsr;
5710
5711 for (level = 0; level < active->num_levels; level++) {
5712 struct g4x_pipe_wm *raw =
5713 &crtc_state->wm.vlv.raw[level];
5714
5715 active->sr[level].plane = wm->sr.plane;
5716 active->sr[level].cursor = wm->sr.cursor;
5717
5718 for_each_plane_id_on_crtc(crtc, plane_id) {
5719 active->wm[level].plane[plane_id] =
5720 wm->pipe[pipe].plane[plane_id];
5721
5722 raw->plane[plane_id] =
5723 vlv_invert_wm_value(active->wm[level].plane[plane_id],
5724 fifo_state->plane[plane_id]);
5725 }
5726 }
5727
5728 for_each_plane_id_on_crtc(crtc, plane_id)
5729 vlv_raw_plane_wm_set(crtc_state, level,
5730 plane_id, USHRT_MAX);
5731 vlv_invalidate_wms(crtc, active, level);
5732
5733 crtc_state->wm.vlv.optimal = *active;
5734 crtc_state->wm.vlv.intermediate = *active;
5735
5736 DRM_DEBUG_KMS("Initial watermarks: pipe %c, plane=%d, cursor=%d, sprite0=%d, sprite1=%d\n",
5737 pipe_name(pipe),
5738 wm->pipe[pipe].plane[PLANE_PRIMARY],
5739 wm->pipe[pipe].plane[PLANE_CURSOR],
5740 wm->pipe[pipe].plane[PLANE_SPRITE0],
5741 wm->pipe[pipe].plane[PLANE_SPRITE1]);
5742 }
5743
5744 DRM_DEBUG_KMS("Initial watermarks: SR plane=%d, SR cursor=%d level=%d cxsr=%d\n",
5745 wm->sr.plane, wm->sr.cursor, wm->level, wm->cxsr);
5746 }
5747
5748 void vlv_wm_sanitize(struct drm_i915_private *dev_priv)
5749 {
5750 struct intel_plane *plane;
5751 struct intel_crtc *crtc;
5752
5753 mutex_lock(&dev_priv->wm.wm_mutex);
5754
5755 for_each_intel_plane(&dev_priv->drm, plane) {
5756 struct intel_crtc *crtc =
5757 intel_get_crtc_for_pipe(dev_priv, plane->pipe);
5758 struct intel_crtc_state *crtc_state =
5759 to_intel_crtc_state(crtc->base.state);
5760 struct intel_plane_state *plane_state =
5761 to_intel_plane_state(plane->base.state);
5762 struct vlv_wm_state *wm_state = &crtc_state->wm.vlv.optimal;
5763 const struct vlv_fifo_state *fifo_state =
5764 &crtc_state->wm.vlv.fifo_state;
5765 enum plane_id plane_id = plane->id;
5766 int level;
5767
5768 if (plane_state->base.visible)
5769 continue;
5770
5771 for (level = 0; level < wm_state->num_levels; level++) {
5772 struct g4x_pipe_wm *raw =
5773 &crtc_state->wm.vlv.raw[level];
5774
5775 raw->plane[plane_id] = 0;
5776
5777 wm_state->wm[level].plane[plane_id] =
5778 vlv_invert_wm_value(raw->plane[plane_id],
5779 fifo_state->plane[plane_id]);
5780 }
5781 }
5782
5783 for_each_intel_crtc(&dev_priv->drm, crtc) {
5784 struct intel_crtc_state *crtc_state =
5785 to_intel_crtc_state(crtc->base.state);
5786
5787 crtc_state->wm.vlv.intermediate =
5788 crtc_state->wm.vlv.optimal;
5789 crtc->wm.active.vlv = crtc_state->wm.vlv.optimal;
5790 }
5791
5792 vlv_program_watermarks(dev_priv);
5793
5794 mutex_unlock(&dev_priv->wm.wm_mutex);
5795 }
5796
5797 /*
5798 * FIXME should probably kill this and improve
5799 * the real watermark readout/sanitation instead
5800 */
5801 static void ilk_init_lp_watermarks(struct drm_i915_private *dev_priv)
5802 {
5803 I915_WRITE(WM3_LP_ILK, I915_READ(WM3_LP_ILK) & ~WM1_LP_SR_EN);
5804 I915_WRITE(WM2_LP_ILK, I915_READ(WM2_LP_ILK) & ~WM1_LP_SR_EN);
5805 I915_WRITE(WM1_LP_ILK, I915_READ(WM1_LP_ILK) & ~WM1_LP_SR_EN);
5806
5807 /*
5808 * Don't touch WM1S_LP_EN here.
5809 * Doing so could cause underruns.
5810 */
5811 }
5812
5813 void ilk_wm_get_hw_state(struct drm_device *dev)
5814 {
5815 struct drm_i915_private *dev_priv = to_i915(dev);
5816 struct ilk_wm_values *hw = &dev_priv->wm.hw;
5817 struct drm_crtc *crtc;
5818
5819 ilk_init_lp_watermarks(dev_priv);
5820
5821 for_each_crtc(dev, crtc)
5822 ilk_pipe_wm_get_hw_state(crtc);
5823
5824 hw->wm_lp[0] = I915_READ(WM1_LP_ILK);
5825 hw->wm_lp[1] = I915_READ(WM2_LP_ILK);
5826 hw->wm_lp[2] = I915_READ(WM3_LP_ILK);
5827
5828 hw->wm_lp_spr[0] = I915_READ(WM1S_LP_ILK);
5829 if (INTEL_GEN(dev_priv) >= 7) {
5830 hw->wm_lp_spr[1] = I915_READ(WM2S_LP_IVB);
5831 hw->wm_lp_spr[2] = I915_READ(WM3S_LP_IVB);
5832 }
5833
5834 if (IS_HASWELL(dev_priv) || IS_BROADWELL(dev_priv))
5835 hw->partitioning = (I915_READ(WM_MISC) & WM_MISC_DATA_PARTITION_5_6) ?
5836 INTEL_DDB_PART_5_6 : INTEL_DDB_PART_1_2;
5837 else if (IS_IVYBRIDGE(dev_priv))
5838 hw->partitioning = (I915_READ(DISP_ARB_CTL2) & DISP_DATA_PARTITION_5_6) ?
5839 INTEL_DDB_PART_5_6 : INTEL_DDB_PART_1_2;
5840
5841 hw->enable_fbc_wm =
5842 !(I915_READ(DISP_ARB_CTL) & DISP_FBC_WM_DIS);
5843 }
5844
5845 /**
5846 * intel_update_watermarks - update FIFO watermark values based on current modes
5847 *
5848 * Calculate watermark values for the various WM regs based on current mode
5849 * and plane configuration.
5850 *
5851 * There are several cases to deal with here:
5852 * - normal (i.e. non-self-refresh)
5853 * - self-refresh (SR) mode
5854 * - lines are large relative to FIFO size (buffer can hold up to 2)
5855 * - lines are small relative to FIFO size (buffer can hold more than 2
5856 * lines), so need to account for TLB latency
5857 *
5858 * The normal calculation is:
5859 * watermark = dotclock * bytes per pixel * latency
5860 * where latency is platform & configuration dependent (we assume pessimal
5861 * values here).
5862 *
5863 * The SR calculation is:
5864 * watermark = (trunc(latency/line time)+1) * surface width *
5865 * bytes per pixel
5866 * where
5867 * line time = htotal / dotclock
5868 * surface width = hdisplay for normal plane and 64 for cursor
5869 * and latency is assumed to be high, as above.
5870 *
5871 * The final value programmed to the register should always be rounded up,
5872 * and include an extra 2 entries to account for clock crossings.
5873 *
5874 * We don't use the sprite, so we can ignore that. And on Crestline we have
5875 * to set the non-SR watermarks to 8.
5876 */
5877 void intel_update_watermarks(struct intel_crtc *crtc)
5878 {
5879 struct drm_i915_private *dev_priv = to_i915(crtc->base.dev);
5880
5881 if (dev_priv->display.update_wm)
5882 dev_priv->display.update_wm(crtc);
5883 }
5884
5885 void intel_enable_ipc(struct drm_i915_private *dev_priv)
5886 {
5887 u32 val;
5888
5889 /* Display WA #0477 WaDisableIPC: skl */
5890 if (IS_SKYLAKE(dev_priv)) {
5891 dev_priv->ipc_enabled = false;
5892 return;
5893 }
5894
5895 val = I915_READ(DISP_ARB_CTL2);
5896
5897 if (dev_priv->ipc_enabled)
5898 val |= DISP_IPC_ENABLE;
5899 else
5900 val &= ~DISP_IPC_ENABLE;
5901
5902 I915_WRITE(DISP_ARB_CTL2, val);
5903 }
5904
5905 void intel_init_ipc(struct drm_i915_private *dev_priv)
5906 {
5907 dev_priv->ipc_enabled = false;
5908 if (!HAS_IPC(dev_priv))
5909 return;
5910
5911 dev_priv->ipc_enabled = true;
5912 intel_enable_ipc(dev_priv);
5913 }
5914
5915 /*
5916 * Lock protecting IPS related data structures
5917 */
5918 DEFINE_SPINLOCK(mchdev_lock);
5919
5920 /* Global for IPS driver to get at the current i915 device. Protected by
5921 * mchdev_lock. */
5922 static struct drm_i915_private *i915_mch_dev;
5923
5924 bool ironlake_set_drps(struct drm_i915_private *dev_priv, u8 val)
5925 {
5926 u16 rgvswctl;
5927
5928 lockdep_assert_held(&mchdev_lock);
5929
5930 rgvswctl = I915_READ16(MEMSWCTL);
5931 if (rgvswctl & MEMCTL_CMD_STS) {
5932 DRM_DEBUG("gpu busy, RCS change rejected\n");
5933 return false; /* still busy with another command */
5934 }
5935
5936 rgvswctl = (MEMCTL_CMD_CHFREQ << MEMCTL_CMD_SHIFT) |
5937 (val << MEMCTL_FREQ_SHIFT) | MEMCTL_SFCAVM;
5938 I915_WRITE16(MEMSWCTL, rgvswctl);
5939 POSTING_READ16(MEMSWCTL);
5940
5941 rgvswctl |= MEMCTL_CMD_STS;
5942 I915_WRITE16(MEMSWCTL, rgvswctl);
5943
5944 return true;
5945 }
5946
5947 static void ironlake_enable_drps(struct drm_i915_private *dev_priv)
5948 {
5949 u32 rgvmodectl;
5950 u8 fmax, fmin, fstart, vstart;
5951
5952 spin_lock_irq(&mchdev_lock);
5953
5954 rgvmodectl = I915_READ(MEMMODECTL);
5955
5956 /* Enable temp reporting */
5957 I915_WRITE16(PMMISC, I915_READ(PMMISC) | MCPPCE_EN);
5958 I915_WRITE16(TSC1, I915_READ(TSC1) | TSE);
5959
5960 /* 100ms RC evaluation intervals */
5961 I915_WRITE(RCUPEI, 100000);
5962 I915_WRITE(RCDNEI, 100000);
5963
5964 /* Set max/min thresholds to 90ms and 80ms respectively */
5965 I915_WRITE(RCBMAXAVG, 90000);
5966 I915_WRITE(RCBMINAVG, 80000);
5967
5968 I915_WRITE(MEMIHYST, 1);
5969
5970 /* Set up min, max, and cur for interrupt handling */
5971 fmax = (rgvmodectl & MEMMODE_FMAX_MASK) >> MEMMODE_FMAX_SHIFT;
5972 fmin = (rgvmodectl & MEMMODE_FMIN_MASK);
5973 fstart = (rgvmodectl & MEMMODE_FSTART_MASK) >>
5974 MEMMODE_FSTART_SHIFT;
5975
5976 vstart = (I915_READ(PXVFREQ(fstart)) & PXVFREQ_PX_MASK) >>
5977 PXVFREQ_PX_SHIFT;
5978
5979 dev_priv->ips.fmax = fmax; /* IPS callback will increase this */
5980 dev_priv->ips.fstart = fstart;
5981
5982 dev_priv->ips.max_delay = fstart;
5983 dev_priv->ips.min_delay = fmin;
5984 dev_priv->ips.cur_delay = fstart;
5985
5986 DRM_DEBUG_DRIVER("fmax: %d, fmin: %d, fstart: %d\n",
5987 fmax, fmin, fstart);
5988
5989 I915_WRITE(MEMINTREN, MEMINT_CX_SUPR_EN | MEMINT_EVAL_CHG_EN);
5990
5991 /*
5992 * Interrupts will be enabled in ironlake_irq_postinstall
5993 */
5994
5995 I915_WRITE(VIDSTART, vstart);
5996 POSTING_READ(VIDSTART);
5997
5998 rgvmodectl |= MEMMODE_SWMODE_EN;
5999 I915_WRITE(MEMMODECTL, rgvmodectl);
6000
6001 if (wait_for_atomic((I915_READ(MEMSWCTL) & MEMCTL_CMD_STS) == 0, 10))
6002 DRM_ERROR("stuck trying to change perf mode\n");
6003 mdelay(1);
6004
6005 ironlake_set_drps(dev_priv, fstart);
6006
6007 dev_priv->ips.last_count1 = I915_READ(DMIEC) +
6008 I915_READ(DDREC) + I915_READ(CSIEC);
6009 dev_priv->ips.last_time1 = jiffies_to_msecs(jiffies);
6010 dev_priv->ips.last_count2 = I915_READ(GFXEC);
6011 dev_priv->ips.last_time2 = ktime_get_raw_ns();
6012
6013 spin_unlock_irq(&mchdev_lock);
6014 }
6015
6016 static void ironlake_disable_drps(struct drm_i915_private *dev_priv)
6017 {
6018 u16 rgvswctl;
6019
6020 spin_lock_irq(&mchdev_lock);
6021
6022 rgvswctl = I915_READ16(MEMSWCTL);
6023
6024 /* Ack interrupts, disable EFC interrupt */
6025 I915_WRITE(MEMINTREN, I915_READ(MEMINTREN) & ~MEMINT_EVAL_CHG_EN);
6026 I915_WRITE(MEMINTRSTS, MEMINT_EVAL_CHG);
6027 I915_WRITE(DEIER, I915_READ(DEIER) & ~DE_PCU_EVENT);
6028 I915_WRITE(DEIIR, DE_PCU_EVENT);
6029 I915_WRITE(DEIMR, I915_READ(DEIMR) | DE_PCU_EVENT);
6030
6031 /* Go back to the starting frequency */
6032 ironlake_set_drps(dev_priv, dev_priv->ips.fstart);
6033 mdelay(1);
6034 rgvswctl |= MEMCTL_CMD_STS;
6035 I915_WRITE(MEMSWCTL, rgvswctl);
6036 mdelay(1);
6037
6038 spin_unlock_irq(&mchdev_lock);
6039 }
6040
6041 /* There's a funny hw issue where the hw returns all 0 when reading from
6042 * GEN6_RP_INTERRUPT_LIMITS. Hence we always need to compute the desired value
6043 * ourselves, instead of doing a rmw cycle (which might result in us clearing
6044 * all limits and the gpu stuck at whatever frequency it is at atm).
6045 */
6046 static u32 intel_rps_limits(struct drm_i915_private *dev_priv, u8 val)
6047 {
6048 struct intel_rps *rps = &dev_priv->gt_pm.rps;
6049 u32 limits;
6050
6051 /* Only set the down limit when we've reached the lowest level to avoid
6052 * getting more interrupts, otherwise leave this clear. This prevents a
6053 * race in the hw when coming out of rc6: There's a tiny window where
6054 * the hw runs at the minimal clock before selecting the desired
6055 * frequency, if the down threshold expires in that window we will not
6056 * receive a down interrupt. */
6057 if (INTEL_GEN(dev_priv) >= 9) {
6058 limits = (rps->max_freq_softlimit) << 23;
6059 if (val <= rps->min_freq_softlimit)
6060 limits |= (rps->min_freq_softlimit) << 14;
6061 } else {
6062 limits = rps->max_freq_softlimit << 24;
6063 if (val <= rps->min_freq_softlimit)
6064 limits |= rps->min_freq_softlimit << 16;
6065 }
6066
6067 return limits;
6068 }
6069
6070 static void gen6_set_rps_thresholds(struct drm_i915_private *dev_priv, u8 val)
6071 {
6072 struct intel_rps *rps = &dev_priv->gt_pm.rps;
6073 int new_power;
6074 u32 threshold_up = 0, threshold_down = 0; /* in % */
6075 u32 ei_up = 0, ei_down = 0;
6076
6077 new_power = rps->power;
6078 switch (rps->power) {
6079 case LOW_POWER:
6080 if (val > rps->efficient_freq + 1 &&
6081 val > rps->cur_freq)
6082 new_power = BETWEEN;
6083 break;
6084
6085 case BETWEEN:
6086 if (val <= rps->efficient_freq &&
6087 val < rps->cur_freq)
6088 new_power = LOW_POWER;
6089 else if (val >= rps->rp0_freq &&
6090 val > rps->cur_freq)
6091 new_power = HIGH_POWER;
6092 break;
6093
6094 case HIGH_POWER:
6095 if (val < (rps->rp1_freq + rps->rp0_freq) >> 1 &&
6096 val < rps->cur_freq)
6097 new_power = BETWEEN;
6098 break;
6099 }
6100 /* Max/min bins are special */
6101 if (val <= rps->min_freq_softlimit)
6102 new_power = LOW_POWER;
6103 if (val >= rps->max_freq_softlimit)
6104 new_power = HIGH_POWER;
6105 if (new_power == rps->power)
6106 return;
6107
6108 /* Note the units here are not exactly 1us, but 1280ns. */
6109 switch (new_power) {
6110 case LOW_POWER:
6111 /* Upclock if more than 95% busy over 16ms */
6112 ei_up = 16000;
6113 threshold_up = 95;
6114
6115 /* Downclock if less than 85% busy over 32ms */
6116 ei_down = 32000;
6117 threshold_down = 85;
6118 break;
6119
6120 case BETWEEN:
6121 /* Upclock if more than 90% busy over 13ms */
6122 ei_up = 13000;
6123 threshold_up = 90;
6124
6125 /* Downclock if less than 75% busy over 32ms */
6126 ei_down = 32000;
6127 threshold_down = 75;
6128 break;
6129
6130 case HIGH_POWER:
6131 /* Upclock if more than 85% busy over 10ms */
6132 ei_up = 10000;
6133 threshold_up = 85;
6134
6135 /* Downclock if less than 60% busy over 32ms */
6136 ei_down = 32000;
6137 threshold_down = 60;
6138 break;
6139 }
6140
6141 /* When byt can survive without system hang with dynamic
6142 * sw freq adjustments, this restriction can be lifted.
6143 */
6144 if (IS_VALLEYVIEW(dev_priv))
6145 goto skip_hw_write;
6146
6147 I915_WRITE(GEN6_RP_UP_EI,
6148 GT_INTERVAL_FROM_US(dev_priv, ei_up));
6149 I915_WRITE(GEN6_RP_UP_THRESHOLD,
6150 GT_INTERVAL_FROM_US(dev_priv,
6151 ei_up * threshold_up / 100));
6152
6153 I915_WRITE(GEN6_RP_DOWN_EI,
6154 GT_INTERVAL_FROM_US(dev_priv, ei_down));
6155 I915_WRITE(GEN6_RP_DOWN_THRESHOLD,
6156 GT_INTERVAL_FROM_US(dev_priv,
6157 ei_down * threshold_down / 100));
6158
6159 I915_WRITE(GEN6_RP_CONTROL,
6160 GEN6_RP_MEDIA_TURBO |
6161 GEN6_RP_MEDIA_HW_NORMAL_MODE |
6162 GEN6_RP_MEDIA_IS_GFX |
6163 GEN6_RP_ENABLE |
6164 GEN6_RP_UP_BUSY_AVG |
6165 GEN6_RP_DOWN_IDLE_AVG);
6166
6167 skip_hw_write:
6168 rps->power = new_power;
6169 rps->up_threshold = threshold_up;
6170 rps->down_threshold = threshold_down;
6171 rps->last_adj = 0;
6172 }
6173
6174 static u32 gen6_rps_pm_mask(struct drm_i915_private *dev_priv, u8 val)
6175 {
6176 struct intel_rps *rps = &dev_priv->gt_pm.rps;
6177 u32 mask = 0;
6178
6179 /* We use UP_EI_EXPIRED interupts for both up/down in manual mode */
6180 if (val > rps->min_freq_softlimit)
6181 mask |= GEN6_PM_RP_UP_EI_EXPIRED | GEN6_PM_RP_DOWN_THRESHOLD | GEN6_PM_RP_DOWN_TIMEOUT;
6182 if (val < rps->max_freq_softlimit)
6183 mask |= GEN6_PM_RP_UP_EI_EXPIRED | GEN6_PM_RP_UP_THRESHOLD;
6184
6185 mask &= dev_priv->pm_rps_events;
6186
6187 return gen6_sanitize_rps_pm_mask(dev_priv, ~mask);
6188 }
6189
6190 /* gen6_set_rps is called to update the frequency request, but should also be
6191 * called when the range (min_delay and max_delay) is modified so that we can
6192 * update the GEN6_RP_INTERRUPT_LIMITS register accordingly. */
6193 static int gen6_set_rps(struct drm_i915_private *dev_priv, u8 val)
6194 {
6195 struct intel_rps *rps = &dev_priv->gt_pm.rps;
6196
6197 /* min/max delay may still have been modified so be sure to
6198 * write the limits value.
6199 */
6200 if (val != rps->cur_freq) {
6201 gen6_set_rps_thresholds(dev_priv, val);
6202
6203 if (INTEL_GEN(dev_priv) >= 9)
6204 I915_WRITE(GEN6_RPNSWREQ,
6205 GEN9_FREQUENCY(val));
6206 else if (IS_HASWELL(dev_priv) || IS_BROADWELL(dev_priv))
6207 I915_WRITE(GEN6_RPNSWREQ,
6208 HSW_FREQUENCY(val));
6209 else
6210 I915_WRITE(GEN6_RPNSWREQ,
6211 GEN6_FREQUENCY(val) |
6212 GEN6_OFFSET(0) |
6213 GEN6_AGGRESSIVE_TURBO);
6214 }
6215
6216 /* Make sure we continue to get interrupts
6217 * until we hit the minimum or maximum frequencies.
6218 */
6219 I915_WRITE(GEN6_RP_INTERRUPT_LIMITS, intel_rps_limits(dev_priv, val));
6220 I915_WRITE(GEN6_PMINTRMSK, gen6_rps_pm_mask(dev_priv, val));
6221
6222 rps->cur_freq = val;
6223 trace_intel_gpu_freq_change(intel_gpu_freq(dev_priv, val));
6224
6225 return 0;
6226 }
6227
6228 static int valleyview_set_rps(struct drm_i915_private *dev_priv, u8 val)
6229 {
6230 int err;
6231
6232 if (WARN_ONCE(IS_CHERRYVIEW(dev_priv) && (val & 1),
6233 "Odd GPU freq value\n"))
6234 val &= ~1;
6235
6236 I915_WRITE(GEN6_PMINTRMSK, gen6_rps_pm_mask(dev_priv, val));
6237
6238 if (val != dev_priv->gt_pm.rps.cur_freq) {
6239 err = vlv_punit_write(dev_priv, PUNIT_REG_GPU_FREQ_REQ, val);
6240 if (err)
6241 return err;
6242
6243 gen6_set_rps_thresholds(dev_priv, val);
6244 }
6245
6246 dev_priv->gt_pm.rps.cur_freq = val;
6247 trace_intel_gpu_freq_change(intel_gpu_freq(dev_priv, val));
6248
6249 return 0;
6250 }
6251
6252 /* vlv_set_rps_idle: Set the frequency to idle, if Gfx clocks are down
6253 *
6254 * * If Gfx is Idle, then
6255 * 1. Forcewake Media well.
6256 * 2. Request idle freq.
6257 * 3. Release Forcewake of Media well.
6258 */
6259 static void vlv_set_rps_idle(struct drm_i915_private *dev_priv)
6260 {
6261 struct intel_rps *rps = &dev_priv->gt_pm.rps;
6262 u32 val = rps->idle_freq;
6263 int err;
6264
6265 if (rps->cur_freq <= val)
6266 return;
6267
6268 /* The punit delays the write of the frequency and voltage until it
6269 * determines the GPU is awake. During normal usage we don't want to
6270 * waste power changing the frequency if the GPU is sleeping (rc6).
6271 * However, the GPU and driver is now idle and we do not want to delay
6272 * switching to minimum voltage (reducing power whilst idle) as we do
6273 * not expect to be woken in the near future and so must flush the
6274 * change by waking the device.
6275 *
6276 * We choose to take the media powerwell (either would do to trick the
6277 * punit into committing the voltage change) as that takes a lot less
6278 * power than the render powerwell.
6279 */
6280 intel_uncore_forcewake_get(dev_priv, FORCEWAKE_MEDIA);
6281 err = valleyview_set_rps(dev_priv, val);
6282 intel_uncore_forcewake_put(dev_priv, FORCEWAKE_MEDIA);
6283
6284 if (err)
6285 DRM_ERROR("Failed to set RPS for idle\n");
6286 }
6287
6288 void gen6_rps_busy(struct drm_i915_private *dev_priv)
6289 {
6290 struct intel_rps *rps = &dev_priv->gt_pm.rps;
6291
6292 mutex_lock(&dev_priv->pcu_lock);
6293 if (rps->enabled) {
6294 u8 freq;
6295
6296 if (dev_priv->pm_rps_events & GEN6_PM_RP_UP_EI_EXPIRED)
6297 gen6_rps_reset_ei(dev_priv);
6298 I915_WRITE(GEN6_PMINTRMSK,
6299 gen6_rps_pm_mask(dev_priv, rps->cur_freq));
6300
6301 gen6_enable_rps_interrupts(dev_priv);
6302
6303 /* Use the user's desired frequency as a guide, but for better
6304 * performance, jump directly to RPe as our starting frequency.
6305 */
6306 freq = max(rps->cur_freq,
6307 rps->efficient_freq);
6308
6309 if (intel_set_rps(dev_priv,
6310 clamp(freq,
6311 rps->min_freq_softlimit,
6312 rps->max_freq_softlimit)))
6313 DRM_DEBUG_DRIVER("Failed to set idle frequency\n");
6314 }
6315 mutex_unlock(&dev_priv->pcu_lock);
6316 }
6317
6318 void gen6_rps_idle(struct drm_i915_private *dev_priv)
6319 {
6320 struct intel_rps *rps = &dev_priv->gt_pm.rps;
6321
6322 /* Flush our bottom-half so that it does not race with us
6323 * setting the idle frequency and so that it is bounded by
6324 * our rpm wakeref. And then disable the interrupts to stop any
6325 * futher RPS reclocking whilst we are asleep.
6326 */
6327 gen6_disable_rps_interrupts(dev_priv);
6328
6329 mutex_lock(&dev_priv->pcu_lock);
6330 if (rps->enabled) {
6331 if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv))
6332 vlv_set_rps_idle(dev_priv);
6333 else
6334 gen6_set_rps(dev_priv, rps->idle_freq);
6335 rps->last_adj = 0;
6336 I915_WRITE(GEN6_PMINTRMSK,
6337 gen6_sanitize_rps_pm_mask(dev_priv, ~0));
6338 }
6339 mutex_unlock(&dev_priv->pcu_lock);
6340 }
6341
6342 void gen6_rps_boost(struct drm_i915_gem_request *rq,
6343 struct intel_rps_client *rps_client)
6344 {
6345 struct intel_rps *rps = &rq->i915->gt_pm.rps;
6346 unsigned long flags;
6347 bool boost;
6348
6349 /* This is intentionally racy! We peek at the state here, then
6350 * validate inside the RPS worker.
6351 */
6352 if (!rps->enabled)
6353 return;
6354
6355 boost = false;
6356 spin_lock_irqsave(&rq->lock, flags);
6357 if (!rq->waitboost && !i915_gem_request_completed(rq)) {
6358 atomic_inc(&rps->num_waiters);
6359 rq->waitboost = true;
6360 boost = true;
6361 }
6362 spin_unlock_irqrestore(&rq->lock, flags);
6363 if (!boost)
6364 return;
6365
6366 if (READ_ONCE(rps->cur_freq) < rps->boost_freq)
6367 schedule_work(&rps->work);
6368
6369 atomic_inc(rps_client ? &rps_client->boosts : &rps->boosts);
6370 }
6371
6372 int intel_set_rps(struct drm_i915_private *dev_priv, u8 val)
6373 {
6374 struct intel_rps *rps = &dev_priv->gt_pm.rps;
6375 int err;
6376
6377 lockdep_assert_held(&dev_priv->pcu_lock);
6378 GEM_BUG_ON(val > rps->max_freq);
6379 GEM_BUG_ON(val < rps->min_freq);
6380
6381 if (!rps->enabled) {
6382 rps->cur_freq = val;
6383 return 0;
6384 }
6385
6386 if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv))
6387 err = valleyview_set_rps(dev_priv, val);
6388 else
6389 err = gen6_set_rps(dev_priv, val);
6390
6391 return err;
6392 }
6393
6394 static void gen9_disable_rc6(struct drm_i915_private *dev_priv)
6395 {
6396 I915_WRITE(GEN6_RC_CONTROL, 0);
6397 I915_WRITE(GEN9_PG_ENABLE, 0);
6398 }
6399
6400 static void gen9_disable_rps(struct drm_i915_private *dev_priv)
6401 {
6402 I915_WRITE(GEN6_RP_CONTROL, 0);
6403 }
6404
6405 static void gen6_disable_rc6(struct drm_i915_private *dev_priv)
6406 {
6407 I915_WRITE(GEN6_RC_CONTROL, 0);
6408 }
6409
6410 static void gen6_disable_rps(struct drm_i915_private *dev_priv)
6411 {
6412 I915_WRITE(GEN6_RPNSWREQ, 1 << 31);
6413 I915_WRITE(GEN6_RP_CONTROL, 0);
6414 }
6415
6416 static void cherryview_disable_rc6(struct drm_i915_private *dev_priv)
6417 {
6418 I915_WRITE(GEN6_RC_CONTROL, 0);
6419 }
6420
6421 static void cherryview_disable_rps(struct drm_i915_private *dev_priv)
6422 {
6423 I915_WRITE(GEN6_RP_CONTROL, 0);
6424 }
6425
6426 static void valleyview_disable_rc6(struct drm_i915_private *dev_priv)
6427 {
6428 /* We're doing forcewake before Disabling RC6,
6429 * This what the BIOS expects when going into suspend */
6430 intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL);
6431
6432 I915_WRITE(GEN6_RC_CONTROL, 0);
6433
6434 intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
6435 }
6436
6437 static void valleyview_disable_rps(struct drm_i915_private *dev_priv)
6438 {
6439 I915_WRITE(GEN6_RP_CONTROL, 0);
6440 }
6441
6442 static void intel_print_rc6_info(struct drm_i915_private *dev_priv, u32 mode)
6443 {
6444 if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) {
6445 if (mode & (GEN7_RC_CTL_TO_MODE | GEN6_RC_CTL_EI_MODE(1)))
6446 mode = GEN6_RC_CTL_RC6_ENABLE;
6447 else
6448 mode = 0;
6449 }
6450 if (HAS_RC6p(dev_priv))
6451 DRM_DEBUG_DRIVER("Enabling RC6 states: "
6452 "RC6 %s RC6p %s RC6pp %s\n",
6453 onoff(mode & GEN6_RC_CTL_RC6_ENABLE),
6454 onoff(mode & GEN6_RC_CTL_RC6p_ENABLE),
6455 onoff(mode & GEN6_RC_CTL_RC6pp_ENABLE));
6456
6457 else
6458 DRM_DEBUG_DRIVER("Enabling RC6 states: RC6 %s\n",
6459 onoff(mode & GEN6_RC_CTL_RC6_ENABLE));
6460 }
6461
6462 static bool bxt_check_bios_rc6_setup(struct drm_i915_private *dev_priv)
6463 {
6464 struct i915_ggtt *ggtt = &dev_priv->ggtt;
6465 bool enable_rc6 = true;
6466 unsigned long rc6_ctx_base;
6467 u32 rc_ctl;
6468 int rc_sw_target;
6469
6470 rc_ctl = I915_READ(GEN6_RC_CONTROL);
6471 rc_sw_target = (I915_READ(GEN6_RC_STATE) & RC_SW_TARGET_STATE_MASK) >>
6472 RC_SW_TARGET_STATE_SHIFT;
6473 DRM_DEBUG_DRIVER("BIOS enabled RC states: "
6474 "HW_CTRL %s HW_RC6 %s SW_TARGET_STATE %x\n",
6475 onoff(rc_ctl & GEN6_RC_CTL_HW_ENABLE),
6476 onoff(rc_ctl & GEN6_RC_CTL_RC6_ENABLE),
6477 rc_sw_target);
6478
6479 if (!(I915_READ(RC6_LOCATION) & RC6_CTX_IN_DRAM)) {
6480 DRM_DEBUG_DRIVER("RC6 Base location not set properly.\n");
6481 enable_rc6 = false;
6482 }
6483
6484 /*
6485 * The exact context size is not known for BXT, so assume a page size
6486 * for this check.
6487 */
6488 rc6_ctx_base = I915_READ(RC6_CTX_BASE) & RC6_CTX_BASE_MASK;
6489 if (!((rc6_ctx_base >= ggtt->stolen_reserved_base) &&
6490 (rc6_ctx_base + PAGE_SIZE <= ggtt->stolen_reserved_base +
6491 ggtt->stolen_reserved_size))) {
6492 DRM_DEBUG_DRIVER("RC6 Base address not as expected.\n");
6493 enable_rc6 = false;
6494 }
6495
6496 if (!(((I915_READ(PWRCTX_MAXCNT_RCSUNIT) & IDLE_TIME_MASK) > 1) &&
6497 ((I915_READ(PWRCTX_MAXCNT_VCSUNIT0) & IDLE_TIME_MASK) > 1) &&
6498 ((I915_READ(PWRCTX_MAXCNT_BCSUNIT) & IDLE_TIME_MASK) > 1) &&
6499 ((I915_READ(PWRCTX_MAXCNT_VECSUNIT) & IDLE_TIME_MASK) > 1))) {
6500 DRM_DEBUG_DRIVER("Engine Idle wait time not set properly.\n");
6501 enable_rc6 = false;
6502 }
6503
6504 if (!I915_READ(GEN8_PUSHBUS_CONTROL) ||
6505 !I915_READ(GEN8_PUSHBUS_ENABLE) ||
6506 !I915_READ(GEN8_PUSHBUS_SHIFT)) {
6507 DRM_DEBUG_DRIVER("Pushbus not setup properly.\n");
6508 enable_rc6 = false;
6509 }
6510
6511 if (!I915_READ(GEN6_GFXPAUSE)) {
6512 DRM_DEBUG_DRIVER("GFX pause not setup properly.\n");
6513 enable_rc6 = false;
6514 }
6515
6516 if (!I915_READ(GEN8_MISC_CTRL0)) {
6517 DRM_DEBUG_DRIVER("GPM control not setup properly.\n");
6518 enable_rc6 = false;
6519 }
6520
6521 return enable_rc6;
6522 }
6523
6524 int sanitize_rc6_option(struct drm_i915_private *dev_priv, int enable_rc6)
6525 {
6526 /* No RC6 before Ironlake and code is gone for ilk. */
6527 if (INTEL_INFO(dev_priv)->gen < 6)
6528 return 0;
6529
6530 if (!enable_rc6)
6531 return 0;
6532
6533 if (IS_GEN9_LP(dev_priv) && !bxt_check_bios_rc6_setup(dev_priv)) {
6534 DRM_INFO("RC6 disabled by BIOS\n");
6535 return 0;
6536 }
6537
6538 /* Respect the kernel parameter if it is set */
6539 if (enable_rc6 >= 0) {
6540 int mask;
6541
6542 if (HAS_RC6p(dev_priv))
6543 mask = INTEL_RC6_ENABLE | INTEL_RC6p_ENABLE |
6544 INTEL_RC6pp_ENABLE;
6545 else
6546 mask = INTEL_RC6_ENABLE;
6547
6548 if ((enable_rc6 & mask) != enable_rc6)
6549 DRM_DEBUG_DRIVER("Adjusting RC6 mask to %d "
6550 "(requested %d, valid %d)\n",
6551 enable_rc6 & mask, enable_rc6, mask);
6552
6553 return enable_rc6 & mask;
6554 }
6555
6556 if (IS_IVYBRIDGE(dev_priv))
6557 return (INTEL_RC6_ENABLE | INTEL_RC6p_ENABLE);
6558
6559 return INTEL_RC6_ENABLE;
6560 }
6561
6562 static void gen6_init_rps_frequencies(struct drm_i915_private *dev_priv)
6563 {
6564 struct intel_rps *rps = &dev_priv->gt_pm.rps;
6565
6566 /* All of these values are in units of 50MHz */
6567
6568 /* static values from HW: RP0 > RP1 > RPn (min_freq) */
6569 if (IS_GEN9_LP(dev_priv)) {
6570 u32 rp_state_cap = I915_READ(BXT_RP_STATE_CAP);
6571 rps->rp0_freq = (rp_state_cap >> 16) & 0xff;
6572 rps->rp1_freq = (rp_state_cap >> 8) & 0xff;
6573 rps->min_freq = (rp_state_cap >> 0) & 0xff;
6574 } else {
6575 u32 rp_state_cap = I915_READ(GEN6_RP_STATE_CAP);
6576 rps->rp0_freq = (rp_state_cap >> 0) & 0xff;
6577 rps->rp1_freq = (rp_state_cap >> 8) & 0xff;
6578 rps->min_freq = (rp_state_cap >> 16) & 0xff;
6579 }
6580 /* hw_max = RP0 until we check for overclocking */
6581 rps->max_freq = rps->rp0_freq;
6582
6583 rps->efficient_freq = rps->rp1_freq;
6584 if (IS_HASWELL(dev_priv) || IS_BROADWELL(dev_priv) ||
6585 IS_GEN9_BC(dev_priv) || IS_CANNONLAKE(dev_priv)) {
6586 u32 ddcc_status = 0;
6587
6588 if (sandybridge_pcode_read(dev_priv,
6589 HSW_PCODE_DYNAMIC_DUTY_CYCLE_CONTROL,
6590 &ddcc_status) == 0)
6591 rps->efficient_freq =
6592 clamp_t(u8,
6593 ((ddcc_status >> 8) & 0xff),
6594 rps->min_freq,
6595 rps->max_freq);
6596 }
6597
6598 if (IS_GEN9_BC(dev_priv) || IS_CANNONLAKE(dev_priv)) {
6599 /* Store the frequency values in 16.66 MHZ units, which is
6600 * the natural hardware unit for SKL
6601 */
6602 rps->rp0_freq *= GEN9_FREQ_SCALER;
6603 rps->rp1_freq *= GEN9_FREQ_SCALER;
6604 rps->min_freq *= GEN9_FREQ_SCALER;
6605 rps->max_freq *= GEN9_FREQ_SCALER;
6606 rps->efficient_freq *= GEN9_FREQ_SCALER;
6607 }
6608 }
6609
6610 static void reset_rps(struct drm_i915_private *dev_priv,
6611 int (*set)(struct drm_i915_private *, u8))
6612 {
6613 struct intel_rps *rps = &dev_priv->gt_pm.rps;
6614 u8 freq = rps->cur_freq;
6615
6616 /* force a reset */
6617 rps->power = -1;
6618 rps->cur_freq = -1;
6619
6620 if (set(dev_priv, freq))
6621 DRM_ERROR("Failed to reset RPS to initial values\n");
6622 }
6623
6624 /* See the Gen9_GT_PM_Programming_Guide doc for the below */
6625 static void gen9_enable_rps(struct drm_i915_private *dev_priv)
6626 {
6627 intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL);
6628
6629 /* Program defaults and thresholds for RPS*/
6630 I915_WRITE(GEN6_RC_VIDEO_FREQ,
6631 GEN9_FREQUENCY(dev_priv->gt_pm.rps.rp1_freq));
6632
6633 /* 1 second timeout*/
6634 I915_WRITE(GEN6_RP_DOWN_TIMEOUT,
6635 GT_INTERVAL_FROM_US(dev_priv, 1000000));
6636
6637 I915_WRITE(GEN6_RP_IDLE_HYSTERSIS, 0xa);
6638
6639 /* Leaning on the below call to gen6_set_rps to program/setup the
6640 * Up/Down EI & threshold registers, as well as the RP_CONTROL,
6641 * RP_INTERRUPT_LIMITS & RPNSWREQ registers */
6642 reset_rps(dev_priv, gen6_set_rps);
6643
6644 intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
6645 }
6646
6647 static void gen9_enable_rc6(struct drm_i915_private *dev_priv)
6648 {
6649 struct intel_engine_cs *engine;
6650 enum intel_engine_id id;
6651 u32 rc6_mode, rc6_mask = 0;
6652
6653 /* 1a: Software RC state - RC0 */
6654 I915_WRITE(GEN6_RC_STATE, 0);
6655
6656 /* 1b: Get forcewake during program sequence. Although the driver
6657 * hasn't enabled a state yet where we need forcewake, BIOS may have.*/
6658 intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL);
6659
6660 /* 2a: Disable RC states. */
6661 I915_WRITE(GEN6_RC_CONTROL, 0);
6662
6663 /* 2b: Program RC6 thresholds.*/
6664
6665 /* WaRsDoubleRc6WrlWithCoarsePowerGating: Doubling WRL only when CPG is enabled */
6666 if (IS_SKYLAKE(dev_priv))
6667 I915_WRITE(GEN6_RC6_WAKE_RATE_LIMIT, 108 << 16);
6668 else
6669 I915_WRITE(GEN6_RC6_WAKE_RATE_LIMIT, 54 << 16);
6670 I915_WRITE(GEN6_RC_EVALUATION_INTERVAL, 125000); /* 12500 * 1280ns */
6671 I915_WRITE(GEN6_RC_IDLE_HYSTERSIS, 25); /* 25 * 1280ns */
6672 for_each_engine(engine, dev_priv, id)
6673 I915_WRITE(RING_MAX_IDLE(engine->mmio_base), 10);
6674
6675 if (HAS_GUC(dev_priv))
6676 I915_WRITE(GUC_MAX_IDLE_COUNT, 0xA);
6677
6678 I915_WRITE(GEN6_RC_SLEEP, 0);
6679
6680 /* 2c: Program Coarse Power Gating Policies. */
6681 I915_WRITE(GEN9_MEDIA_PG_IDLE_HYSTERESIS, 25);
6682 I915_WRITE(GEN9_RENDER_PG_IDLE_HYSTERESIS, 25);
6683
6684 /* 3a: Enable RC6 */
6685 if (intel_rc6_enabled() & INTEL_RC6_ENABLE)
6686 rc6_mask = GEN6_RC_CTL_RC6_ENABLE;
6687 DRM_INFO("RC6 %s\n", onoff(rc6_mask & GEN6_RC_CTL_RC6_ENABLE));
6688 I915_WRITE(GEN6_RC6_THRESHOLD, 37500); /* 37.5/125ms per EI */
6689
6690 /* WaRsUseTimeoutMode:cnl (pre-prod) */
6691 if (IS_CNL_REVID(dev_priv, CNL_REVID_A0, CNL_REVID_C0))
6692 rc6_mode = GEN7_RC_CTL_TO_MODE;
6693 else
6694 rc6_mode = GEN6_RC_CTL_EI_MODE(1);
6695
6696 I915_WRITE(GEN6_RC_CONTROL,
6697 GEN6_RC_CTL_HW_ENABLE | rc6_mode | rc6_mask);
6698
6699 /*
6700 * 3b: Enable Coarse Power Gating only when RC6 is enabled.
6701 * WaRsDisableCoarsePowerGating:skl,bxt - Render/Media PG need to be disabled with RC6.
6702 */
6703 if (NEEDS_WaRsDisableCoarsePowerGating(dev_priv))
6704 I915_WRITE(GEN9_PG_ENABLE, 0);
6705 else
6706 I915_WRITE(GEN9_PG_ENABLE, (rc6_mask & GEN6_RC_CTL_RC6_ENABLE) ?
6707 (GEN9_RENDER_PG_ENABLE | GEN9_MEDIA_PG_ENABLE) : 0);
6708
6709 intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
6710 }
6711
6712 static void gen8_enable_rc6(struct drm_i915_private *dev_priv)
6713 {
6714 struct intel_engine_cs *engine;
6715 enum intel_engine_id id;
6716 uint32_t rc6_mask = 0;
6717
6718 /* 1a: Software RC state - RC0 */
6719 I915_WRITE(GEN6_RC_STATE, 0);
6720
6721 /* 1b: Get forcewake during program sequence. Although the driver
6722 * hasn't enabled a state yet where we need forcewake, BIOS may have.*/
6723 intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL);
6724
6725 /* 2a: Disable RC states. */
6726 I915_WRITE(GEN6_RC_CONTROL, 0);
6727
6728 /* 2b: Program RC6 thresholds.*/
6729 I915_WRITE(GEN6_RC6_WAKE_RATE_LIMIT, 40 << 16);
6730 I915_WRITE(GEN6_RC_EVALUATION_INTERVAL, 125000); /* 12500 * 1280ns */
6731 I915_WRITE(GEN6_RC_IDLE_HYSTERSIS, 25); /* 25 * 1280ns */
6732 for_each_engine(engine, dev_priv, id)
6733 I915_WRITE(RING_MAX_IDLE(engine->mmio_base), 10);
6734 I915_WRITE(GEN6_RC_SLEEP, 0);
6735 I915_WRITE(GEN6_RC6_THRESHOLD, 625); /* 800us/1.28 for TO */
6736
6737 /* 3: Enable RC6 */
6738 if (intel_rc6_enabled() & INTEL_RC6_ENABLE)
6739 rc6_mask = GEN6_RC_CTL_RC6_ENABLE;
6740 intel_print_rc6_info(dev_priv, rc6_mask);
6741
6742 I915_WRITE(GEN6_RC_CONTROL, GEN6_RC_CTL_HW_ENABLE |
6743 GEN7_RC_CTL_TO_MODE |
6744 rc6_mask);
6745
6746 intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
6747 }
6748
6749 static void gen8_enable_rps(struct drm_i915_private *dev_priv)
6750 {
6751 struct intel_rps *rps = &dev_priv->gt_pm.rps;
6752
6753 intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL);
6754
6755 /* 1 Program defaults and thresholds for RPS*/
6756 I915_WRITE(GEN6_RPNSWREQ,
6757 HSW_FREQUENCY(rps->rp1_freq));
6758 I915_WRITE(GEN6_RC_VIDEO_FREQ,
6759 HSW_FREQUENCY(rps->rp1_freq));
6760 /* NB: Docs say 1s, and 1000000 - which aren't equivalent */
6761 I915_WRITE(GEN6_RP_DOWN_TIMEOUT, 100000000 / 128); /* 1 second timeout */
6762
6763 /* Docs recommend 900MHz, and 300 MHz respectively */
6764 I915_WRITE(GEN6_RP_INTERRUPT_LIMITS,
6765 rps->max_freq_softlimit << 24 |
6766 rps->min_freq_softlimit << 16);
6767
6768 I915_WRITE(GEN6_RP_UP_THRESHOLD, 7600000 / 128); /* 76ms busyness per EI, 90% */
6769 I915_WRITE(GEN6_RP_DOWN_THRESHOLD, 31300000 / 128); /* 313ms busyness per EI, 70%*/
6770 I915_WRITE(GEN6_RP_UP_EI, 66000); /* 84.48ms, XXX: random? */
6771 I915_WRITE(GEN6_RP_DOWN_EI, 350000); /* 448ms, XXX: random? */
6772
6773 I915_WRITE(GEN6_RP_IDLE_HYSTERSIS, 10);
6774
6775 /* 2: Enable RPS */
6776 I915_WRITE(GEN6_RP_CONTROL,
6777 GEN6_RP_MEDIA_TURBO |
6778 GEN6_RP_MEDIA_HW_NORMAL_MODE |
6779 GEN6_RP_MEDIA_IS_GFX |
6780 GEN6_RP_ENABLE |
6781 GEN6_RP_UP_BUSY_AVG |
6782 GEN6_RP_DOWN_IDLE_AVG);
6783
6784 reset_rps(dev_priv, gen6_set_rps);
6785
6786 intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
6787 }
6788
6789 static void gen6_enable_rc6(struct drm_i915_private *dev_priv)
6790 {
6791 struct intel_engine_cs *engine;
6792 enum intel_engine_id id;
6793 u32 rc6vids, rc6_mask = 0;
6794 u32 gtfifodbg;
6795 int rc6_mode;
6796 int ret;
6797
6798 I915_WRITE(GEN6_RC_STATE, 0);
6799
6800 /* Clear the DBG now so we don't confuse earlier errors */
6801 gtfifodbg = I915_READ(GTFIFODBG);
6802 if (gtfifodbg) {
6803 DRM_ERROR("GT fifo had a previous error %x\n", gtfifodbg);
6804 I915_WRITE(GTFIFODBG, gtfifodbg);
6805 }
6806
6807 intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL);
6808
6809 /* disable the counters and set deterministic thresholds */
6810 I915_WRITE(GEN6_RC_CONTROL, 0);
6811
6812 I915_WRITE(GEN6_RC1_WAKE_RATE_LIMIT, 1000 << 16);
6813 I915_WRITE(GEN6_RC6_WAKE_RATE_LIMIT, 40 << 16 | 30);
6814 I915_WRITE(GEN6_RC6pp_WAKE_RATE_LIMIT, 30);
6815 I915_WRITE(GEN6_RC_EVALUATION_INTERVAL, 125000);
6816 I915_WRITE(GEN6_RC_IDLE_HYSTERSIS, 25);
6817
6818 for_each_engine(engine, dev_priv, id)
6819 I915_WRITE(RING_MAX_IDLE(engine->mmio_base), 10);
6820
6821 I915_WRITE(GEN6_RC_SLEEP, 0);
6822 I915_WRITE(GEN6_RC1e_THRESHOLD, 1000);
6823 if (IS_IVYBRIDGE(dev_priv))
6824 I915_WRITE(GEN6_RC6_THRESHOLD, 125000);
6825 else
6826 I915_WRITE(GEN6_RC6_THRESHOLD, 50000);
6827 I915_WRITE(GEN6_RC6p_THRESHOLD, 150000);
6828 I915_WRITE(GEN6_RC6pp_THRESHOLD, 64000); /* unused */
6829
6830 /* Check if we are enabling RC6 */
6831 rc6_mode = intel_rc6_enabled();
6832 if (rc6_mode & INTEL_RC6_ENABLE)
6833 rc6_mask |= GEN6_RC_CTL_RC6_ENABLE;
6834
6835 /* We don't use those on Haswell */
6836 if (!IS_HASWELL(dev_priv)) {
6837 if (rc6_mode & INTEL_RC6p_ENABLE)
6838 rc6_mask |= GEN6_RC_CTL_RC6p_ENABLE;
6839
6840 if (rc6_mode & INTEL_RC6pp_ENABLE)
6841 rc6_mask |= GEN6_RC_CTL_RC6pp_ENABLE;
6842 }
6843
6844 intel_print_rc6_info(dev_priv, rc6_mask);
6845
6846 I915_WRITE(GEN6_RC_CONTROL,
6847 rc6_mask |
6848 GEN6_RC_CTL_EI_MODE(1) |
6849 GEN6_RC_CTL_HW_ENABLE);
6850
6851 rc6vids = 0;
6852 ret = sandybridge_pcode_read(dev_priv, GEN6_PCODE_READ_RC6VIDS, &rc6vids);
6853 if (IS_GEN6(dev_priv) && ret) {
6854 DRM_DEBUG_DRIVER("Couldn't check for BIOS workaround\n");
6855 } else if (IS_GEN6(dev_priv) && (GEN6_DECODE_RC6_VID(rc6vids & 0xff) < 450)) {
6856 DRM_DEBUG_DRIVER("You should update your BIOS. Correcting minimum rc6 voltage (%dmV->%dmV)\n",
6857 GEN6_DECODE_RC6_VID(rc6vids & 0xff), 450);
6858 rc6vids &= 0xffff00;
6859 rc6vids |= GEN6_ENCODE_RC6_VID(450);
6860 ret = sandybridge_pcode_write(dev_priv, GEN6_PCODE_WRITE_RC6VIDS, rc6vids);
6861 if (ret)
6862 DRM_ERROR("Couldn't fix incorrect rc6 voltage\n");
6863 }
6864
6865 intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
6866 }
6867
6868 static void gen6_enable_rps(struct drm_i915_private *dev_priv)
6869 {
6870 /* Here begins a magic sequence of register writes to enable
6871 * auto-downclocking.
6872 *
6873 * Perhaps there might be some value in exposing these to
6874 * userspace...
6875 */
6876 intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL);
6877
6878 /* Power down if completely idle for over 50ms */
6879 I915_WRITE(GEN6_RP_DOWN_TIMEOUT, 50000);
6880 I915_WRITE(GEN6_RP_IDLE_HYSTERSIS, 10);
6881
6882 reset_rps(dev_priv, gen6_set_rps);
6883
6884 intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
6885 }
6886
6887 static void gen6_update_ring_freq(struct drm_i915_private *dev_priv)
6888 {
6889 struct intel_rps *rps = &dev_priv->gt_pm.rps;
6890 int min_freq = 15;
6891 unsigned int gpu_freq;
6892 unsigned int max_ia_freq, min_ring_freq;
6893 unsigned int max_gpu_freq, min_gpu_freq;
6894 int scaling_factor = 180;
6895 struct cpufreq_policy *policy;
6896
6897 WARN_ON(!mutex_is_locked(&dev_priv->pcu_lock));
6898
6899 policy = cpufreq_cpu_get(0);
6900 if (policy) {
6901 max_ia_freq = policy->cpuinfo.max_freq;
6902 cpufreq_cpu_put(policy);
6903 } else {
6904 /*
6905 * Default to measured freq if none found, PCU will ensure we
6906 * don't go over
6907 */
6908 max_ia_freq = tsc_khz;
6909 }
6910
6911 /* Convert from kHz to MHz */
6912 max_ia_freq /= 1000;
6913
6914 min_ring_freq = I915_READ(DCLK) & 0xf;
6915 /* convert DDR frequency from units of 266.6MHz to bandwidth */
6916 min_ring_freq = mult_frac(min_ring_freq, 8, 3);
6917
6918 if (IS_GEN9_BC(dev_priv) || IS_CANNONLAKE(dev_priv)) {
6919 /* Convert GT frequency to 50 HZ units */
6920 min_gpu_freq = rps->min_freq / GEN9_FREQ_SCALER;
6921 max_gpu_freq = rps->max_freq / GEN9_FREQ_SCALER;
6922 } else {
6923 min_gpu_freq = rps->min_freq;
6924 max_gpu_freq = rps->max_freq;
6925 }
6926
6927 /*
6928 * For each potential GPU frequency, load a ring frequency we'd like
6929 * to use for memory access. We do this by specifying the IA frequency
6930 * the PCU should use as a reference to determine the ring frequency.
6931 */
6932 for (gpu_freq = max_gpu_freq; gpu_freq >= min_gpu_freq; gpu_freq--) {
6933 int diff = max_gpu_freq - gpu_freq;
6934 unsigned int ia_freq = 0, ring_freq = 0;
6935
6936 if (IS_GEN9_BC(dev_priv) || IS_CANNONLAKE(dev_priv)) {
6937 /*
6938 * ring_freq = 2 * GT. ring_freq is in 100MHz units
6939 * No floor required for ring frequency on SKL.
6940 */
6941 ring_freq = gpu_freq;
6942 } else if (INTEL_INFO(dev_priv)->gen >= 8) {
6943 /* max(2 * GT, DDR). NB: GT is 50MHz units */
6944 ring_freq = max(min_ring_freq, gpu_freq);
6945 } else if (IS_HASWELL(dev_priv)) {
6946 ring_freq = mult_frac(gpu_freq, 5, 4);
6947 ring_freq = max(min_ring_freq, ring_freq);
6948 /* leave ia_freq as the default, chosen by cpufreq */
6949 } else {
6950 /* On older processors, there is no separate ring
6951 * clock domain, so in order to boost the bandwidth
6952 * of the ring, we need to upclock the CPU (ia_freq).
6953 *
6954 * For GPU frequencies less than 750MHz,
6955 * just use the lowest ring freq.
6956 */
6957 if (gpu_freq < min_freq)
6958 ia_freq = 800;
6959 else
6960 ia_freq = max_ia_freq - ((diff * scaling_factor) / 2);
6961 ia_freq = DIV_ROUND_CLOSEST(ia_freq, 100);
6962 }
6963
6964 sandybridge_pcode_write(dev_priv,
6965 GEN6_PCODE_WRITE_MIN_FREQ_TABLE,
6966 ia_freq << GEN6_PCODE_FREQ_IA_RATIO_SHIFT |
6967 ring_freq << GEN6_PCODE_FREQ_RING_RATIO_SHIFT |
6968 gpu_freq);
6969 }
6970 }
6971
6972 static int cherryview_rps_max_freq(struct drm_i915_private *dev_priv)
6973 {
6974 u32 val, rp0;
6975
6976 val = vlv_punit_read(dev_priv, FB_GFX_FMAX_AT_VMAX_FUSE);
6977
6978 switch (INTEL_INFO(dev_priv)->sseu.eu_total) {
6979 case 8:
6980 /* (2 * 4) config */
6981 rp0 = (val >> FB_GFX_FMAX_AT_VMAX_2SS4EU_FUSE_SHIFT);
6982 break;
6983 case 12:
6984 /* (2 * 6) config */
6985 rp0 = (val >> FB_GFX_FMAX_AT_VMAX_2SS6EU_FUSE_SHIFT);
6986 break;
6987 case 16:
6988 /* (2 * 8) config */
6989 default:
6990 /* Setting (2 * 8) Min RP0 for any other combination */
6991 rp0 = (val >> FB_GFX_FMAX_AT_VMAX_2SS8EU_FUSE_SHIFT);
6992 break;
6993 }
6994
6995 rp0 = (rp0 & FB_GFX_FREQ_FUSE_MASK);
6996
6997 return rp0;
6998 }
6999
7000 static int cherryview_rps_rpe_freq(struct drm_i915_private *dev_priv)
7001 {
7002 u32 val, rpe;
7003
7004 val = vlv_punit_read(dev_priv, PUNIT_GPU_DUTYCYCLE_REG);
7005 rpe = (val >> PUNIT_GPU_DUTYCYCLE_RPE_FREQ_SHIFT) & PUNIT_GPU_DUTYCYCLE_RPE_FREQ_MASK;
7006
7007 return rpe;
7008 }
7009
7010 static int cherryview_rps_guar_freq(struct drm_i915_private *dev_priv)
7011 {
7012 u32 val, rp1;
7013
7014 val = vlv_punit_read(dev_priv, FB_GFX_FMAX_AT_VMAX_FUSE);
7015 rp1 = (val & FB_GFX_FREQ_FUSE_MASK);
7016
7017 return rp1;
7018 }
7019
7020 static u32 cherryview_rps_min_freq(struct drm_i915_private *dev_priv)
7021 {
7022 u32 val, rpn;
7023
7024 val = vlv_punit_read(dev_priv, FB_GFX_FMIN_AT_VMIN_FUSE);
7025 rpn = ((val >> FB_GFX_FMIN_AT_VMIN_FUSE_SHIFT) &
7026 FB_GFX_FREQ_FUSE_MASK);
7027
7028 return rpn;
7029 }
7030
7031 static int valleyview_rps_guar_freq(struct drm_i915_private *dev_priv)
7032 {
7033 u32 val, rp1;
7034
7035 val = vlv_nc_read(dev_priv, IOSF_NC_FB_GFX_FREQ_FUSE);
7036
7037 rp1 = (val & FB_GFX_FGUARANTEED_FREQ_FUSE_MASK) >> FB_GFX_FGUARANTEED_FREQ_FUSE_SHIFT;
7038
7039 return rp1;
7040 }
7041
7042 static int valleyview_rps_max_freq(struct drm_i915_private *dev_priv)
7043 {
7044 u32 val, rp0;
7045
7046 val = vlv_nc_read(dev_priv, IOSF_NC_FB_GFX_FREQ_FUSE);
7047
7048 rp0 = (val & FB_GFX_MAX_FREQ_FUSE_MASK) >> FB_GFX_MAX_FREQ_FUSE_SHIFT;
7049 /* Clamp to max */
7050 rp0 = min_t(u32, rp0, 0xea);
7051
7052 return rp0;
7053 }
7054
7055 static int valleyview_rps_rpe_freq(struct drm_i915_private *dev_priv)
7056 {
7057 u32 val, rpe;
7058
7059 val = vlv_nc_read(dev_priv, IOSF_NC_FB_GFX_FMAX_FUSE_LO);
7060 rpe = (val & FB_FMAX_VMIN_FREQ_LO_MASK) >> FB_FMAX_VMIN_FREQ_LO_SHIFT;
7061 val = vlv_nc_read(dev_priv, IOSF_NC_FB_GFX_FMAX_FUSE_HI);
7062 rpe |= (val & FB_FMAX_VMIN_FREQ_HI_MASK) << 5;
7063
7064 return rpe;
7065 }
7066
7067 static int valleyview_rps_min_freq(struct drm_i915_private *dev_priv)
7068 {
7069 u32 val;
7070
7071 val = vlv_punit_read(dev_priv, PUNIT_REG_GPU_LFM) & 0xff;
7072 /*
7073 * According to the BYT Punit GPU turbo HAS 1.1.6.3 the minimum value
7074 * for the minimum frequency in GPLL mode is 0xc1. Contrary to this on
7075 * a BYT-M B0 the above register contains 0xbf. Moreover when setting
7076 * a frequency Punit will not allow values below 0xc0. Clamp it 0xc0
7077 * to make sure it matches what Punit accepts.
7078 */
7079 return max_t(u32, val, 0xc0);
7080 }
7081
7082 /* Check that the pctx buffer wasn't move under us. */
7083 static void valleyview_check_pctx(struct drm_i915_private *dev_priv)
7084 {
7085 unsigned long pctx_addr = I915_READ(VLV_PCBR) & ~4095;
7086
7087 WARN_ON(pctx_addr != dev_priv->mm.stolen_base +
7088 dev_priv->vlv_pctx->stolen->start);
7089 }
7090
7091
7092 /* Check that the pcbr address is not empty. */
7093 static void cherryview_check_pctx(struct drm_i915_private *dev_priv)
7094 {
7095 unsigned long pctx_addr = I915_READ(VLV_PCBR) & ~4095;
7096
7097 WARN_ON((pctx_addr >> VLV_PCBR_ADDR_SHIFT) == 0);
7098 }
7099
7100 static void cherryview_setup_pctx(struct drm_i915_private *dev_priv)
7101 {
7102 struct i915_ggtt *ggtt = &dev_priv->ggtt;
7103 unsigned long pctx_paddr, paddr;
7104 u32 pcbr;
7105 int pctx_size = 32*1024;
7106
7107 pcbr = I915_READ(VLV_PCBR);
7108 if ((pcbr >> VLV_PCBR_ADDR_SHIFT) == 0) {
7109 DRM_DEBUG_DRIVER("BIOS didn't set up PCBR, fixing up\n");
7110 paddr = (dev_priv->mm.stolen_base +
7111 (ggtt->stolen_size - pctx_size));
7112
7113 pctx_paddr = (paddr & (~4095));
7114 I915_WRITE(VLV_PCBR, pctx_paddr);
7115 }
7116
7117 DRM_DEBUG_DRIVER("PCBR: 0x%08x\n", I915_READ(VLV_PCBR));
7118 }
7119
7120 static void valleyview_setup_pctx(struct drm_i915_private *dev_priv)
7121 {
7122 struct drm_i915_gem_object *pctx;
7123 unsigned long pctx_paddr;
7124 u32 pcbr;
7125 int pctx_size = 24*1024;
7126
7127 pcbr = I915_READ(VLV_PCBR);
7128 if (pcbr) {
7129 /* BIOS set it up already, grab the pre-alloc'd space */
7130 int pcbr_offset;
7131
7132 pcbr_offset = (pcbr & (~4095)) - dev_priv->mm.stolen_base;
7133 pctx = i915_gem_object_create_stolen_for_preallocated(dev_priv,
7134 pcbr_offset,
7135 I915_GTT_OFFSET_NONE,
7136 pctx_size);
7137 goto out;
7138 }
7139
7140 DRM_DEBUG_DRIVER("BIOS didn't set up PCBR, fixing up\n");
7141
7142 /*
7143 * From the Gunit register HAS:
7144 * The Gfx driver is expected to program this register and ensure
7145 * proper allocation within Gfx stolen memory. For example, this
7146 * register should be programmed such than the PCBR range does not
7147 * overlap with other ranges, such as the frame buffer, protected
7148 * memory, or any other relevant ranges.
7149 */
7150 pctx = i915_gem_object_create_stolen(dev_priv, pctx_size);
7151 if (!pctx) {
7152 DRM_DEBUG("not enough stolen space for PCTX, disabling\n");
7153 goto out;
7154 }
7155
7156 pctx_paddr = dev_priv->mm.stolen_base + pctx->stolen->start;
7157 I915_WRITE(VLV_PCBR, pctx_paddr);
7158
7159 out:
7160 DRM_DEBUG_DRIVER("PCBR: 0x%08x\n", I915_READ(VLV_PCBR));
7161 dev_priv->vlv_pctx = pctx;
7162 }
7163
7164 static void valleyview_cleanup_pctx(struct drm_i915_private *dev_priv)
7165 {
7166 if (WARN_ON(!dev_priv->vlv_pctx))
7167 return;
7168
7169 i915_gem_object_put(dev_priv->vlv_pctx);
7170 dev_priv->vlv_pctx = NULL;
7171 }
7172
7173 static void vlv_init_gpll_ref_freq(struct drm_i915_private *dev_priv)
7174 {
7175 dev_priv->gt_pm.rps.gpll_ref_freq =
7176 vlv_get_cck_clock(dev_priv, "GPLL ref",
7177 CCK_GPLL_CLOCK_CONTROL,
7178 dev_priv->czclk_freq);
7179
7180 DRM_DEBUG_DRIVER("GPLL reference freq: %d kHz\n",
7181 dev_priv->gt_pm.rps.gpll_ref_freq);
7182 }
7183
7184 static void valleyview_init_gt_powersave(struct drm_i915_private *dev_priv)
7185 {
7186 struct intel_rps *rps = &dev_priv->gt_pm.rps;
7187 u32 val;
7188
7189 valleyview_setup_pctx(dev_priv);
7190
7191 vlv_init_gpll_ref_freq(dev_priv);
7192
7193 val = vlv_punit_read(dev_priv, PUNIT_REG_GPU_FREQ_STS);
7194 switch ((val >> 6) & 3) {
7195 case 0:
7196 case 1:
7197 dev_priv->mem_freq = 800;
7198 break;
7199 case 2:
7200 dev_priv->mem_freq = 1066;
7201 break;
7202 case 3:
7203 dev_priv->mem_freq = 1333;
7204 break;
7205 }
7206 DRM_DEBUG_DRIVER("DDR speed: %d MHz\n", dev_priv->mem_freq);
7207
7208 rps->max_freq = valleyview_rps_max_freq(dev_priv);
7209 rps->rp0_freq = rps->max_freq;
7210 DRM_DEBUG_DRIVER("max GPU freq: %d MHz (%u)\n",
7211 intel_gpu_freq(dev_priv, rps->max_freq),
7212 rps->max_freq);
7213
7214 rps->efficient_freq = valleyview_rps_rpe_freq(dev_priv);
7215 DRM_DEBUG_DRIVER("RPe GPU freq: %d MHz (%u)\n",
7216 intel_gpu_freq(dev_priv, rps->efficient_freq),
7217 rps->efficient_freq);
7218
7219 rps->rp1_freq = valleyview_rps_guar_freq(dev_priv);
7220 DRM_DEBUG_DRIVER("RP1(Guar Freq) GPU freq: %d MHz (%u)\n",
7221 intel_gpu_freq(dev_priv, rps->rp1_freq),
7222 rps->rp1_freq);
7223
7224 rps->min_freq = valleyview_rps_min_freq(dev_priv);
7225 DRM_DEBUG_DRIVER("min GPU freq: %d MHz (%u)\n",
7226 intel_gpu_freq(dev_priv, rps->min_freq),
7227 rps->min_freq);
7228 }
7229
7230 static void cherryview_init_gt_powersave(struct drm_i915_private *dev_priv)
7231 {
7232 struct intel_rps *rps = &dev_priv->gt_pm.rps;
7233 u32 val;
7234
7235 cherryview_setup_pctx(dev_priv);
7236
7237 vlv_init_gpll_ref_freq(dev_priv);
7238
7239 mutex_lock(&dev_priv->sb_lock);
7240 val = vlv_cck_read(dev_priv, CCK_FUSE_REG);
7241 mutex_unlock(&dev_priv->sb_lock);
7242
7243 switch ((val >> 2) & 0x7) {
7244 case 3:
7245 dev_priv->mem_freq = 2000;
7246 break;
7247 default:
7248 dev_priv->mem_freq = 1600;
7249 break;
7250 }
7251 DRM_DEBUG_DRIVER("DDR speed: %d MHz\n", dev_priv->mem_freq);
7252
7253 rps->max_freq = cherryview_rps_max_freq(dev_priv);
7254 rps->rp0_freq = rps->max_freq;
7255 DRM_DEBUG_DRIVER("max GPU freq: %d MHz (%u)\n",
7256 intel_gpu_freq(dev_priv, rps->max_freq),
7257 rps->max_freq);
7258
7259 rps->efficient_freq = cherryview_rps_rpe_freq(dev_priv);
7260 DRM_DEBUG_DRIVER("RPe GPU freq: %d MHz (%u)\n",
7261 intel_gpu_freq(dev_priv, rps->efficient_freq),
7262 rps->efficient_freq);
7263
7264 rps->rp1_freq = cherryview_rps_guar_freq(dev_priv);
7265 DRM_DEBUG_DRIVER("RP1(Guar) GPU freq: %d MHz (%u)\n",
7266 intel_gpu_freq(dev_priv, rps->rp1_freq),
7267 rps->rp1_freq);
7268
7269 rps->min_freq = cherryview_rps_min_freq(dev_priv);
7270 DRM_DEBUG_DRIVER("min GPU freq: %d MHz (%u)\n",
7271 intel_gpu_freq(dev_priv, rps->min_freq),
7272 rps->min_freq);
7273
7274 WARN_ONCE((rps->max_freq | rps->efficient_freq | rps->rp1_freq |
7275 rps->min_freq) & 1,
7276 "Odd GPU freq values\n");
7277 }
7278
7279 static void valleyview_cleanup_gt_powersave(struct drm_i915_private *dev_priv)
7280 {
7281 valleyview_cleanup_pctx(dev_priv);
7282 }
7283
7284 static void cherryview_enable_rc6(struct drm_i915_private *dev_priv)
7285 {
7286 struct intel_engine_cs *engine;
7287 enum intel_engine_id id;
7288 u32 gtfifodbg, rc6_mode = 0, pcbr;
7289
7290 gtfifodbg = I915_READ(GTFIFODBG) & ~(GT_FIFO_SBDEDICATE_FREE_ENTRY_CHV |
7291 GT_FIFO_FREE_ENTRIES_CHV);
7292 if (gtfifodbg) {
7293 DRM_DEBUG_DRIVER("GT fifo had a previous error %x\n",
7294 gtfifodbg);
7295 I915_WRITE(GTFIFODBG, gtfifodbg);
7296 }
7297
7298 cherryview_check_pctx(dev_priv);
7299
7300 /* 1a & 1b: Get forcewake during program sequence. Although the driver
7301 * hasn't enabled a state yet where we need forcewake, BIOS may have.*/
7302 intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL);
7303
7304 /* Disable RC states. */
7305 I915_WRITE(GEN6_RC_CONTROL, 0);
7306
7307 /* 2a: Program RC6 thresholds.*/
7308 I915_WRITE(GEN6_RC6_WAKE_RATE_LIMIT, 40 << 16);
7309 I915_WRITE(GEN6_RC_EVALUATION_INTERVAL, 125000); /* 12500 * 1280ns */
7310 I915_WRITE(GEN6_RC_IDLE_HYSTERSIS, 25); /* 25 * 1280ns */
7311
7312 for_each_engine(engine, dev_priv, id)
7313 I915_WRITE(RING_MAX_IDLE(engine->mmio_base), 10);
7314 I915_WRITE(GEN6_RC_SLEEP, 0);
7315
7316 /* TO threshold set to 500 us ( 0x186 * 1.28 us) */
7317 I915_WRITE(GEN6_RC6_THRESHOLD, 0x186);
7318
7319 /* Allows RC6 residency counter to work */
7320 I915_WRITE(VLV_COUNTER_CONTROL,
7321 _MASKED_BIT_ENABLE(VLV_COUNT_RANGE_HIGH |
7322 VLV_MEDIA_RC6_COUNT_EN |
7323 VLV_RENDER_RC6_COUNT_EN));
7324
7325 /* For now we assume BIOS is allocating and populating the PCBR */
7326 pcbr = I915_READ(VLV_PCBR);
7327
7328 /* 3: Enable RC6 */
7329 if ((intel_rc6_enabled() & INTEL_RC6_ENABLE) &&
7330 (pcbr >> VLV_PCBR_ADDR_SHIFT))
7331 rc6_mode = GEN7_RC_CTL_TO_MODE;
7332
7333 I915_WRITE(GEN6_RC_CONTROL, rc6_mode);
7334
7335 intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
7336 }
7337
7338 static void cherryview_enable_rps(struct drm_i915_private *dev_priv)
7339 {
7340 u32 val;
7341
7342 intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL);
7343
7344 /* 1: Program defaults and thresholds for RPS*/
7345 I915_WRITE(GEN6_RP_DOWN_TIMEOUT, 1000000);
7346 I915_WRITE(GEN6_RP_UP_THRESHOLD, 59400);
7347 I915_WRITE(GEN6_RP_DOWN_THRESHOLD, 245000);
7348 I915_WRITE(GEN6_RP_UP_EI, 66000);
7349 I915_WRITE(GEN6_RP_DOWN_EI, 350000);
7350
7351 I915_WRITE(GEN6_RP_IDLE_HYSTERSIS, 10);
7352
7353 /* 2: Enable RPS */
7354 I915_WRITE(GEN6_RP_CONTROL,
7355 GEN6_RP_MEDIA_HW_NORMAL_MODE |
7356 GEN6_RP_MEDIA_IS_GFX |
7357 GEN6_RP_ENABLE |
7358 GEN6_RP_UP_BUSY_AVG |
7359 GEN6_RP_DOWN_IDLE_AVG);
7360
7361 /* Setting Fixed Bias */
7362 val = VLV_OVERRIDE_EN |
7363 VLV_SOC_TDP_EN |
7364 CHV_BIAS_CPU_50_SOC_50;
7365 vlv_punit_write(dev_priv, VLV_TURBO_SOC_OVERRIDE, val);
7366
7367 val = vlv_punit_read(dev_priv, PUNIT_REG_GPU_FREQ_STS);
7368
7369 /* RPS code assumes GPLL is used */
7370 WARN_ONCE((val & GPLLENABLE) == 0, "GPLL not enabled\n");
7371
7372 DRM_DEBUG_DRIVER("GPLL enabled? %s\n", yesno(val & GPLLENABLE));
7373 DRM_DEBUG_DRIVER("GPU status: 0x%08x\n", val);
7374
7375 reset_rps(dev_priv, valleyview_set_rps);
7376
7377 intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
7378 }
7379
7380 static void valleyview_enable_rc6(struct drm_i915_private *dev_priv)
7381 {
7382 struct intel_engine_cs *engine;
7383 enum intel_engine_id id;
7384 u32 gtfifodbg, rc6_mode = 0;
7385
7386 valleyview_check_pctx(dev_priv);
7387
7388 gtfifodbg = I915_READ(GTFIFODBG);
7389 if (gtfifodbg) {
7390 DRM_DEBUG_DRIVER("GT fifo had a previous error %x\n",
7391 gtfifodbg);
7392 I915_WRITE(GTFIFODBG, gtfifodbg);
7393 }
7394
7395 intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL);
7396
7397 /* Disable RC states. */
7398 I915_WRITE(GEN6_RC_CONTROL, 0);
7399
7400 I915_WRITE(GEN6_RC6_WAKE_RATE_LIMIT, 0x00280000);
7401 I915_WRITE(GEN6_RC_EVALUATION_INTERVAL, 125000);
7402 I915_WRITE(GEN6_RC_IDLE_HYSTERSIS, 25);
7403
7404 for_each_engine(engine, dev_priv, id)
7405 I915_WRITE(RING_MAX_IDLE(engine->mmio_base), 10);
7406
7407 I915_WRITE(GEN6_RC6_THRESHOLD, 0x557);
7408
7409 /* Allows RC6 residency counter to work */
7410 I915_WRITE(VLV_COUNTER_CONTROL,
7411 _MASKED_BIT_ENABLE(VLV_COUNT_RANGE_HIGH |
7412 VLV_MEDIA_RC0_COUNT_EN |
7413 VLV_RENDER_RC0_COUNT_EN |
7414 VLV_MEDIA_RC6_COUNT_EN |
7415 VLV_RENDER_RC6_COUNT_EN));
7416
7417 if (intel_rc6_enabled() & INTEL_RC6_ENABLE)
7418 rc6_mode = GEN7_RC_CTL_TO_MODE | VLV_RC_CTL_CTX_RST_PARALLEL;
7419
7420 intel_print_rc6_info(dev_priv, rc6_mode);
7421
7422 I915_WRITE(GEN6_RC_CONTROL, rc6_mode);
7423
7424 intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
7425 }
7426
7427 static void valleyview_enable_rps(struct drm_i915_private *dev_priv)
7428 {
7429 u32 val;
7430
7431 intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL);
7432
7433 I915_WRITE(GEN6_RP_DOWN_TIMEOUT, 1000000);
7434 I915_WRITE(GEN6_RP_UP_THRESHOLD, 59400);
7435 I915_WRITE(GEN6_RP_DOWN_THRESHOLD, 245000);
7436 I915_WRITE(GEN6_RP_UP_EI, 66000);
7437 I915_WRITE(GEN6_RP_DOWN_EI, 350000);
7438
7439 I915_WRITE(GEN6_RP_IDLE_HYSTERSIS, 10);
7440
7441 I915_WRITE(GEN6_RP_CONTROL,
7442 GEN6_RP_MEDIA_TURBO |
7443 GEN6_RP_MEDIA_HW_NORMAL_MODE |
7444 GEN6_RP_MEDIA_IS_GFX |
7445 GEN6_RP_ENABLE |
7446 GEN6_RP_UP_BUSY_AVG |
7447 GEN6_RP_DOWN_IDLE_CONT);
7448
7449 /* Setting Fixed Bias */
7450 val = VLV_OVERRIDE_EN |
7451 VLV_SOC_TDP_EN |
7452 VLV_BIAS_CPU_125_SOC_875;
7453 vlv_punit_write(dev_priv, VLV_TURBO_SOC_OVERRIDE, val);
7454
7455 val = vlv_punit_read(dev_priv, PUNIT_REG_GPU_FREQ_STS);
7456
7457 /* RPS code assumes GPLL is used */
7458 WARN_ONCE((val & GPLLENABLE) == 0, "GPLL not enabled\n");
7459
7460 DRM_DEBUG_DRIVER("GPLL enabled? %s\n", yesno(val & GPLLENABLE));
7461 DRM_DEBUG_DRIVER("GPU status: 0x%08x\n", val);
7462
7463 reset_rps(dev_priv, valleyview_set_rps);
7464
7465 intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
7466 }
7467
7468 static unsigned long intel_pxfreq(u32 vidfreq)
7469 {
7470 unsigned long freq;
7471 int div = (vidfreq & 0x3f0000) >> 16;
7472 int post = (vidfreq & 0x3000) >> 12;
7473 int pre = (vidfreq & 0x7);
7474
7475 if (!pre)
7476 return 0;
7477
7478 freq = ((div * 133333) / ((1<<post) * pre));
7479
7480 return freq;
7481 }
7482
7483 static const struct cparams {
7484 u16 i;
7485 u16 t;
7486 u16 m;
7487 u16 c;
7488 } cparams[] = {
7489 { 1, 1333, 301, 28664 },
7490 { 1, 1066, 294, 24460 },
7491 { 1, 800, 294, 25192 },
7492 { 0, 1333, 276, 27605 },
7493 { 0, 1066, 276, 27605 },
7494 { 0, 800, 231, 23784 },
7495 };
7496
7497 static unsigned long __i915_chipset_val(struct drm_i915_private *dev_priv)
7498 {
7499 u64 total_count, diff, ret;
7500 u32 count1, count2, count3, m = 0, c = 0;
7501 unsigned long now = jiffies_to_msecs(jiffies), diff1;
7502 int i;
7503
7504 lockdep_assert_held(&mchdev_lock);
7505
7506 diff1 = now - dev_priv->ips.last_time1;
7507
7508 /* Prevent division-by-zero if we are asking too fast.
7509 * Also, we don't get interesting results if we are polling
7510 * faster than once in 10ms, so just return the saved value
7511 * in such cases.
7512 */
7513 if (diff1 <= 10)
7514 return dev_priv->ips.chipset_power;
7515
7516 count1 = I915_READ(DMIEC);
7517 count2 = I915_READ(DDREC);
7518 count3 = I915_READ(CSIEC);
7519
7520 total_count = count1 + count2 + count3;
7521
7522 /* FIXME: handle per-counter overflow */
7523 if (total_count < dev_priv->ips.last_count1) {
7524 diff = ~0UL - dev_priv->ips.last_count1;
7525 diff += total_count;
7526 } else {
7527 diff = total_count - dev_priv->ips.last_count1;
7528 }
7529
7530 for (i = 0; i < ARRAY_SIZE(cparams); i++) {
7531 if (cparams[i].i == dev_priv->ips.c_m &&
7532 cparams[i].t == dev_priv->ips.r_t) {
7533 m = cparams[i].m;
7534 c = cparams[i].c;
7535 break;
7536 }
7537 }
7538
7539 diff = div_u64(diff, diff1);
7540 ret = ((m * diff) + c);
7541 ret = div_u64(ret, 10);
7542
7543 dev_priv->ips.last_count1 = total_count;
7544 dev_priv->ips.last_time1 = now;
7545
7546 dev_priv->ips.chipset_power = ret;
7547
7548 return ret;
7549 }
7550
7551 unsigned long i915_chipset_val(struct drm_i915_private *dev_priv)
7552 {
7553 unsigned long val;
7554
7555 if (INTEL_INFO(dev_priv)->gen != 5)
7556 return 0;
7557
7558 spin_lock_irq(&mchdev_lock);
7559
7560 val = __i915_chipset_val(dev_priv);
7561
7562 spin_unlock_irq(&mchdev_lock);
7563
7564 return val;
7565 }
7566
7567 unsigned long i915_mch_val(struct drm_i915_private *dev_priv)
7568 {
7569 unsigned long m, x, b;
7570 u32 tsfs;
7571
7572 tsfs = I915_READ(TSFS);
7573
7574 m = ((tsfs & TSFS_SLOPE_MASK) >> TSFS_SLOPE_SHIFT);
7575 x = I915_READ8(TR1);
7576
7577 b = tsfs & TSFS_INTR_MASK;
7578
7579 return ((m * x) / 127) - b;
7580 }
7581
7582 static int _pxvid_to_vd(u8 pxvid)
7583 {
7584 if (pxvid == 0)
7585 return 0;
7586
7587 if (pxvid >= 8 && pxvid < 31)
7588 pxvid = 31;
7589
7590 return (pxvid + 2) * 125;
7591 }
7592
7593 static u32 pvid_to_extvid(struct drm_i915_private *dev_priv, u8 pxvid)
7594 {
7595 const int vd = _pxvid_to_vd(pxvid);
7596 const int vm = vd - 1125;
7597
7598 if (INTEL_INFO(dev_priv)->is_mobile)
7599 return vm > 0 ? vm : 0;
7600
7601 return vd;
7602 }
7603
7604 static void __i915_update_gfx_val(struct drm_i915_private *dev_priv)
7605 {
7606 u64 now, diff, diffms;
7607 u32 count;
7608
7609 lockdep_assert_held(&mchdev_lock);
7610
7611 now = ktime_get_raw_ns();
7612 diffms = now - dev_priv->ips.last_time2;
7613 do_div(diffms, NSEC_PER_MSEC);
7614
7615 /* Don't divide by 0 */
7616 if (!diffms)
7617 return;
7618
7619 count = I915_READ(GFXEC);
7620
7621 if (count < dev_priv->ips.last_count2) {
7622 diff = ~0UL - dev_priv->ips.last_count2;
7623 diff += count;
7624 } else {
7625 diff = count - dev_priv->ips.last_count2;
7626 }
7627
7628 dev_priv->ips.last_count2 = count;
7629 dev_priv->ips.last_time2 = now;
7630
7631 /* More magic constants... */
7632 diff = diff * 1181;
7633 diff = div_u64(diff, diffms * 10);
7634 dev_priv->ips.gfx_power = diff;
7635 }
7636
7637 void i915_update_gfx_val(struct drm_i915_private *dev_priv)
7638 {
7639 if (INTEL_INFO(dev_priv)->gen != 5)
7640 return;
7641
7642 spin_lock_irq(&mchdev_lock);
7643
7644 __i915_update_gfx_val(dev_priv);
7645
7646 spin_unlock_irq(&mchdev_lock);
7647 }
7648
7649 static unsigned long __i915_gfx_val(struct drm_i915_private *dev_priv)
7650 {
7651 unsigned long t, corr, state1, corr2, state2;
7652 u32 pxvid, ext_v;
7653
7654 lockdep_assert_held(&mchdev_lock);
7655
7656 pxvid = I915_READ(PXVFREQ(dev_priv->gt_pm.rps.cur_freq));
7657 pxvid = (pxvid >> 24) & 0x7f;
7658 ext_v = pvid_to_extvid(dev_priv, pxvid);
7659
7660 state1 = ext_v;
7661
7662 t = i915_mch_val(dev_priv);
7663
7664 /* Revel in the empirically derived constants */
7665
7666 /* Correction factor in 1/100000 units */
7667 if (t > 80)
7668 corr = ((t * 2349) + 135940);
7669 else if (t >= 50)
7670 corr = ((t * 964) + 29317);
7671 else /* < 50 */
7672 corr = ((t * 301) + 1004);
7673
7674 corr = corr * ((150142 * state1) / 10000 - 78642);
7675 corr /= 100000;
7676 corr2 = (corr * dev_priv->ips.corr);
7677
7678 state2 = (corr2 * state1) / 10000;
7679 state2 /= 100; /* convert to mW */
7680
7681 __i915_update_gfx_val(dev_priv);
7682
7683 return dev_priv->ips.gfx_power + state2;
7684 }
7685
7686 unsigned long i915_gfx_val(struct drm_i915_private *dev_priv)
7687 {
7688 unsigned long val;
7689
7690 if (INTEL_INFO(dev_priv)->gen != 5)
7691 return 0;
7692
7693 spin_lock_irq(&mchdev_lock);
7694
7695 val = __i915_gfx_val(dev_priv);
7696
7697 spin_unlock_irq(&mchdev_lock);
7698
7699 return val;
7700 }
7701
7702 /**
7703 * i915_read_mch_val - return value for IPS use
7704 *
7705 * Calculate and return a value for the IPS driver to use when deciding whether
7706 * we have thermal and power headroom to increase CPU or GPU power budget.
7707 */
7708 unsigned long i915_read_mch_val(void)
7709 {
7710 struct drm_i915_private *dev_priv;
7711 unsigned long chipset_val, graphics_val, ret = 0;
7712
7713 spin_lock_irq(&mchdev_lock);
7714 if (!i915_mch_dev)
7715 goto out_unlock;
7716 dev_priv = i915_mch_dev;
7717
7718 chipset_val = __i915_chipset_val(dev_priv);
7719 graphics_val = __i915_gfx_val(dev_priv);
7720
7721 ret = chipset_val + graphics_val;
7722
7723 out_unlock:
7724 spin_unlock_irq(&mchdev_lock);
7725
7726 return ret;
7727 }
7728 EXPORT_SYMBOL_GPL(i915_read_mch_val);
7729
7730 /**
7731 * i915_gpu_raise - raise GPU frequency limit
7732 *
7733 * Raise the limit; IPS indicates we have thermal headroom.
7734 */
7735 bool i915_gpu_raise(void)
7736 {
7737 struct drm_i915_private *dev_priv;
7738 bool ret = true;
7739
7740 spin_lock_irq(&mchdev_lock);
7741 if (!i915_mch_dev) {
7742 ret = false;
7743 goto out_unlock;
7744 }
7745 dev_priv = i915_mch_dev;
7746
7747 if (dev_priv->ips.max_delay > dev_priv->ips.fmax)
7748 dev_priv->ips.max_delay--;
7749
7750 out_unlock:
7751 spin_unlock_irq(&mchdev_lock);
7752
7753 return ret;
7754 }
7755 EXPORT_SYMBOL_GPL(i915_gpu_raise);
7756
7757 /**
7758 * i915_gpu_lower - lower GPU frequency limit
7759 *
7760 * IPS indicates we're close to a thermal limit, so throttle back the GPU
7761 * frequency maximum.
7762 */
7763 bool i915_gpu_lower(void)
7764 {
7765 struct drm_i915_private *dev_priv;
7766 bool ret = true;
7767
7768 spin_lock_irq(&mchdev_lock);
7769 if (!i915_mch_dev) {
7770 ret = false;
7771 goto out_unlock;
7772 }
7773 dev_priv = i915_mch_dev;
7774
7775 if (dev_priv->ips.max_delay < dev_priv->ips.min_delay)
7776 dev_priv->ips.max_delay++;
7777
7778 out_unlock:
7779 spin_unlock_irq(&mchdev_lock);
7780
7781 return ret;
7782 }
7783 EXPORT_SYMBOL_GPL(i915_gpu_lower);
7784
7785 /**
7786 * i915_gpu_busy - indicate GPU business to IPS
7787 *
7788 * Tell the IPS driver whether or not the GPU is busy.
7789 */
7790 bool i915_gpu_busy(void)
7791 {
7792 bool ret = false;
7793
7794 spin_lock_irq(&mchdev_lock);
7795 if (i915_mch_dev)
7796 ret = i915_mch_dev->gt.awake;
7797 spin_unlock_irq(&mchdev_lock);
7798
7799 return ret;
7800 }
7801 EXPORT_SYMBOL_GPL(i915_gpu_busy);
7802
7803 /**
7804 * i915_gpu_turbo_disable - disable graphics turbo
7805 *
7806 * Disable graphics turbo by resetting the max frequency and setting the
7807 * current frequency to the default.
7808 */
7809 bool i915_gpu_turbo_disable(void)
7810 {
7811 struct drm_i915_private *dev_priv;
7812 bool ret = true;
7813
7814 spin_lock_irq(&mchdev_lock);
7815 if (!i915_mch_dev) {
7816 ret = false;
7817 goto out_unlock;
7818 }
7819 dev_priv = i915_mch_dev;
7820
7821 dev_priv->ips.max_delay = dev_priv->ips.fstart;
7822
7823 if (!ironlake_set_drps(dev_priv, dev_priv->ips.fstart))
7824 ret = false;
7825
7826 out_unlock:
7827 spin_unlock_irq(&mchdev_lock);
7828
7829 return ret;
7830 }
7831 EXPORT_SYMBOL_GPL(i915_gpu_turbo_disable);
7832
7833 /**
7834 * Tells the intel_ips driver that the i915 driver is now loaded, if
7835 * IPS got loaded first.
7836 *
7837 * This awkward dance is so that neither module has to depend on the
7838 * other in order for IPS to do the appropriate communication of
7839 * GPU turbo limits to i915.
7840 */
7841 static void
7842 ips_ping_for_i915_load(void)
7843 {
7844 void (*link)(void);
7845
7846 link = symbol_get(ips_link_to_i915_driver);
7847 if (link) {
7848 link();
7849 symbol_put(ips_link_to_i915_driver);
7850 }
7851 }
7852
7853 void intel_gpu_ips_init(struct drm_i915_private *dev_priv)
7854 {
7855 /* We only register the i915 ips part with intel-ips once everything is
7856 * set up, to avoid intel-ips sneaking in and reading bogus values. */
7857 spin_lock_irq(&mchdev_lock);
7858 i915_mch_dev = dev_priv;
7859 spin_unlock_irq(&mchdev_lock);
7860
7861 ips_ping_for_i915_load();
7862 }
7863
7864 void intel_gpu_ips_teardown(void)
7865 {
7866 spin_lock_irq(&mchdev_lock);
7867 i915_mch_dev = NULL;
7868 spin_unlock_irq(&mchdev_lock);
7869 }
7870
7871 static void intel_init_emon(struct drm_i915_private *dev_priv)
7872 {
7873 u32 lcfuse;
7874 u8 pxw[16];
7875 int i;
7876
7877 /* Disable to program */
7878 I915_WRITE(ECR, 0);
7879 POSTING_READ(ECR);
7880
7881 /* Program energy weights for various events */
7882 I915_WRITE(SDEW, 0x15040d00);
7883 I915_WRITE(CSIEW0, 0x007f0000);
7884 I915_WRITE(CSIEW1, 0x1e220004);
7885 I915_WRITE(CSIEW2, 0x04000004);
7886
7887 for (i = 0; i < 5; i++)
7888 I915_WRITE(PEW(i), 0);
7889 for (i = 0; i < 3; i++)
7890 I915_WRITE(DEW(i), 0);
7891
7892 /* Program P-state weights to account for frequency power adjustment */
7893 for (i = 0; i < 16; i++) {
7894 u32 pxvidfreq = I915_READ(PXVFREQ(i));
7895 unsigned long freq = intel_pxfreq(pxvidfreq);
7896 unsigned long vid = (pxvidfreq & PXVFREQ_PX_MASK) >>
7897 PXVFREQ_PX_SHIFT;
7898 unsigned long val;
7899
7900 val = vid * vid;
7901 val *= (freq / 1000);
7902 val *= 255;
7903 val /= (127*127*900);
7904 if (val > 0xff)
7905 DRM_ERROR("bad pxval: %ld\n", val);
7906 pxw[i] = val;
7907 }
7908 /* Render standby states get 0 weight */
7909 pxw[14] = 0;
7910 pxw[15] = 0;
7911
7912 for (i = 0; i < 4; i++) {
7913 u32 val = (pxw[i*4] << 24) | (pxw[(i*4)+1] << 16) |
7914 (pxw[(i*4)+2] << 8) | (pxw[(i*4)+3]);
7915 I915_WRITE(PXW(i), val);
7916 }
7917
7918 /* Adjust magic regs to magic values (more experimental results) */
7919 I915_WRITE(OGW0, 0);
7920 I915_WRITE(OGW1, 0);
7921 I915_WRITE(EG0, 0x00007f00);
7922 I915_WRITE(EG1, 0x0000000e);
7923 I915_WRITE(EG2, 0x000e0000);
7924 I915_WRITE(EG3, 0x68000300);
7925 I915_WRITE(EG4, 0x42000000);
7926 I915_WRITE(EG5, 0x00140031);
7927 I915_WRITE(EG6, 0);
7928 I915_WRITE(EG7, 0);
7929
7930 for (i = 0; i < 8; i++)
7931 I915_WRITE(PXWL(i), 0);
7932
7933 /* Enable PMON + select events */
7934 I915_WRITE(ECR, 0x80000019);
7935
7936 lcfuse = I915_READ(LCFUSE02);
7937
7938 dev_priv->ips.corr = (lcfuse & LCFUSE_HIV_MASK);
7939 }
7940
7941 static bool i915_rc6_ctx_corrupted(struct drm_i915_private *dev_priv)
7942 {
7943 return !I915_READ(GEN8_RC6_CTX_INFO);
7944 }
7945
7946 static void i915_rc6_ctx_wa_init(struct drm_i915_private *i915)
7947 {
7948 if (!NEEDS_RC6_CTX_CORRUPTION_WA(i915))
7949 return;
7950
7951 if (i915_rc6_ctx_corrupted(i915)) {
7952 DRM_INFO("RC6 context corrupted, disabling runtime power management\n");
7953 i915->gt_pm.rc6.ctx_corrupted = true;
7954 intel_runtime_pm_get(i915);
7955 }
7956 }
7957
7958 static void i915_rc6_ctx_wa_cleanup(struct drm_i915_private *i915)
7959 {
7960 if (i915->gt_pm.rc6.ctx_corrupted) {
7961 intel_runtime_pm_put(i915);
7962 i915->gt_pm.rc6.ctx_corrupted = false;
7963 }
7964 }
7965
7966 /**
7967 * i915_rc6_ctx_wa_suspend - system suspend sequence for the RC6 CTX WA
7968 * @i915: i915 device
7969 *
7970 * Perform any steps needed to clean up the RC6 CTX WA before system suspend.
7971 */
7972 void i915_rc6_ctx_wa_suspend(struct drm_i915_private *i915)
7973 {
7974 if (i915->gt_pm.rc6.ctx_corrupted)
7975 intel_runtime_pm_put(i915);
7976 }
7977
7978 /**
7979 * i915_rc6_ctx_wa_resume - system resume sequence for the RC6 CTX WA
7980 * @i915: i915 device
7981 *
7982 * Perform any steps needed to re-init the RC6 CTX WA after system resume.
7983 */
7984 void i915_rc6_ctx_wa_resume(struct drm_i915_private *i915)
7985 {
7986 if (!i915->gt_pm.rc6.ctx_corrupted)
7987 return;
7988
7989 if (i915_rc6_ctx_corrupted(i915)) {
7990 intel_runtime_pm_get(i915);
7991 return;
7992 }
7993
7994 DRM_INFO("RC6 context restored, re-enabling runtime power management\n");
7995 i915->gt_pm.rc6.ctx_corrupted = false;
7996 }
7997
7998 static void intel_disable_rc6(struct drm_i915_private *dev_priv);
7999
8000 /**
8001 * i915_rc6_ctx_wa_check - check for a new RC6 CTX corruption
8002 * @i915: i915 device
8003 *
8004 * Check if an RC6 CTX corruption has happened since the last check and if so
8005 * disable RC6 and runtime power management.
8006 *
8007 * Return false if no context corruption has happened since the last call of
8008 * this function, true otherwise.
8009 */
8010 bool i915_rc6_ctx_wa_check(struct drm_i915_private *i915)
8011 {
8012 if (!NEEDS_RC6_CTX_CORRUPTION_WA(i915))
8013 return false;
8014
8015 if (i915->gt_pm.rc6.ctx_corrupted)
8016 return false;
8017
8018 if (!i915_rc6_ctx_corrupted(i915))
8019 return false;
8020
8021 DRM_NOTE("RC6 context corruption, disabling runtime power management\n");
8022
8023 intel_disable_rc6(i915);
8024 i915->gt_pm.rc6.ctx_corrupted = true;
8025 intel_runtime_pm_get_noresume(i915);
8026
8027 return true;
8028 }
8029
8030 void intel_init_gt_powersave(struct drm_i915_private *dev_priv)
8031 {
8032 struct intel_rps *rps = &dev_priv->gt_pm.rps;
8033
8034 /*
8035 * RPM depends on RC6 to save restore the GT HW context, so make RC6 a
8036 * requirement.
8037 */
8038 if (!i915_modparams.enable_rc6) {
8039 DRM_INFO("RC6 disabled, disabling runtime PM support\n");
8040 intel_runtime_pm_get(dev_priv);
8041 }
8042
8043 mutex_lock(&dev_priv->pcu_lock);
8044
8045 i915_rc6_ctx_wa_init(dev_priv);
8046
8047 /* Initialize RPS limits (for userspace) */
8048 if (IS_CHERRYVIEW(dev_priv))
8049 cherryview_init_gt_powersave(dev_priv);
8050 else if (IS_VALLEYVIEW(dev_priv))
8051 valleyview_init_gt_powersave(dev_priv);
8052 else if (INTEL_GEN(dev_priv) >= 6)
8053 gen6_init_rps_frequencies(dev_priv);
8054
8055 /* Derive initial user preferences/limits from the hardware limits */
8056 rps->idle_freq = rps->min_freq;
8057 rps->cur_freq = rps->idle_freq;
8058
8059 rps->max_freq_softlimit = rps->max_freq;
8060 rps->min_freq_softlimit = rps->min_freq;
8061
8062 if (IS_HASWELL(dev_priv) || IS_BROADWELL(dev_priv))
8063 rps->min_freq_softlimit =
8064 max_t(int,
8065 rps->efficient_freq,
8066 intel_freq_opcode(dev_priv, 450));
8067
8068 /* After setting max-softlimit, find the overclock max freq */
8069 if (IS_GEN6(dev_priv) ||
8070 IS_IVYBRIDGE(dev_priv) || IS_HASWELL(dev_priv)) {
8071 u32 params = 0;
8072
8073 sandybridge_pcode_read(dev_priv, GEN6_READ_OC_PARAMS, &params);
8074 if (params & BIT(31)) { /* OC supported */
8075 DRM_DEBUG_DRIVER("Overclocking supported, max: %dMHz, overclock: %dMHz\n",
8076 (rps->max_freq & 0xff) * 50,
8077 (params & 0xff) * 50);
8078 rps->max_freq = params & 0xff;
8079 }
8080 }
8081
8082 /* Finally allow us to boost to max by default */
8083 rps->boost_freq = rps->max_freq;
8084
8085 mutex_unlock(&dev_priv->pcu_lock);
8086
8087 intel_autoenable_gt_powersave(dev_priv);
8088 }
8089
8090 void intel_cleanup_gt_powersave(struct drm_i915_private *dev_priv)
8091 {
8092 if (IS_VALLEYVIEW(dev_priv))
8093 valleyview_cleanup_gt_powersave(dev_priv);
8094
8095 i915_rc6_ctx_wa_cleanup(dev_priv);
8096
8097 if (!i915_modparams.enable_rc6)
8098 intel_runtime_pm_put(dev_priv);
8099 }
8100
8101 /**
8102 * intel_suspend_gt_powersave - suspend PM work and helper threads
8103 * @dev_priv: i915 device
8104 *
8105 * We don't want to disable RC6 or other features here, we just want
8106 * to make sure any work we've queued has finished and won't bother
8107 * us while we're suspended.
8108 */
8109 void intel_suspend_gt_powersave(struct drm_i915_private *dev_priv)
8110 {
8111 if (INTEL_GEN(dev_priv) < 6)
8112 return;
8113
8114 if (cancel_delayed_work_sync(&dev_priv->gt_pm.autoenable_work))
8115 intel_runtime_pm_put(dev_priv);
8116
8117 /* gen6_rps_idle() will be called later to disable interrupts */
8118 }
8119
8120 void intel_sanitize_gt_powersave(struct drm_i915_private *dev_priv)
8121 {
8122 dev_priv->gt_pm.rps.enabled = true; /* force RPS disabling */
8123 dev_priv->gt_pm.rc6.enabled = true; /* force RC6 disabling */
8124 intel_disable_gt_powersave(dev_priv);
8125
8126 gen6_reset_rps_interrupts(dev_priv);
8127 }
8128
8129 static inline void intel_disable_llc_pstate(struct drm_i915_private *i915)
8130 {
8131 lockdep_assert_held(&i915->pcu_lock);
8132
8133 if (!i915->gt_pm.llc_pstate.enabled)
8134 return;
8135
8136 /* Currently there is no HW configuration to be done to disable. */
8137
8138 i915->gt_pm.llc_pstate.enabled = false;
8139 }
8140
8141 static void __intel_disable_rc6(struct drm_i915_private *dev_priv)
8142 {
8143 lockdep_assert_held(&dev_priv->pcu_lock);
8144
8145 if (!dev_priv->gt_pm.rc6.enabled)
8146 return;
8147
8148 if (INTEL_GEN(dev_priv) >= 9)
8149 gen9_disable_rc6(dev_priv);
8150 else if (IS_CHERRYVIEW(dev_priv))
8151 cherryview_disable_rc6(dev_priv);
8152 else if (IS_VALLEYVIEW(dev_priv))
8153 valleyview_disable_rc6(dev_priv);
8154 else if (INTEL_GEN(dev_priv) >= 6)
8155 gen6_disable_rc6(dev_priv);
8156
8157 dev_priv->gt_pm.rc6.enabled = false;
8158 }
8159
8160 static void intel_disable_rc6(struct drm_i915_private *dev_priv)
8161 {
8162 mutex_lock(&dev_priv->pcu_lock);
8163 __intel_disable_rc6(dev_priv);
8164 mutex_unlock(&dev_priv->pcu_lock);
8165 }
8166
8167 static void intel_disable_rps(struct drm_i915_private *dev_priv)
8168 {
8169 lockdep_assert_held(&dev_priv->pcu_lock);
8170
8171 if (!dev_priv->gt_pm.rps.enabled)
8172 return;
8173
8174 if (INTEL_GEN(dev_priv) >= 9)
8175 gen9_disable_rps(dev_priv);
8176 else if (IS_CHERRYVIEW(dev_priv))
8177 cherryview_disable_rps(dev_priv);
8178 else if (IS_VALLEYVIEW(dev_priv))
8179 valleyview_disable_rps(dev_priv);
8180 else if (INTEL_GEN(dev_priv) >= 6)
8181 gen6_disable_rps(dev_priv);
8182 else if (IS_IRONLAKE_M(dev_priv))
8183 ironlake_disable_drps(dev_priv);
8184
8185 dev_priv->gt_pm.rps.enabled = false;
8186 }
8187
8188 void intel_disable_gt_powersave(struct drm_i915_private *dev_priv)
8189 {
8190 mutex_lock(&dev_priv->pcu_lock);
8191
8192 __intel_disable_rc6(dev_priv);
8193 intel_disable_rps(dev_priv);
8194 if (HAS_LLC(dev_priv))
8195 intel_disable_llc_pstate(dev_priv);
8196
8197 mutex_unlock(&dev_priv->pcu_lock);
8198 }
8199
8200 static inline void intel_enable_llc_pstate(struct drm_i915_private *i915)
8201 {
8202 lockdep_assert_held(&i915->pcu_lock);
8203
8204 if (i915->gt_pm.llc_pstate.enabled)
8205 return;
8206
8207 gen6_update_ring_freq(i915);
8208
8209 i915->gt_pm.llc_pstate.enabled = true;
8210 }
8211
8212 static void intel_enable_rc6(struct drm_i915_private *dev_priv)
8213 {
8214 lockdep_assert_held(&dev_priv->pcu_lock);
8215
8216 if (dev_priv->gt_pm.rc6.enabled)
8217 return;
8218
8219 if (dev_priv->gt_pm.rc6.ctx_corrupted)
8220 return;
8221
8222 if (IS_CHERRYVIEW(dev_priv))
8223 cherryview_enable_rc6(dev_priv);
8224 else if (IS_VALLEYVIEW(dev_priv))
8225 valleyview_enable_rc6(dev_priv);
8226 else if (INTEL_GEN(dev_priv) >= 9)
8227 gen9_enable_rc6(dev_priv);
8228 else if (IS_BROADWELL(dev_priv))
8229 gen8_enable_rc6(dev_priv);
8230 else if (INTEL_GEN(dev_priv) >= 6)
8231 gen6_enable_rc6(dev_priv);
8232
8233 dev_priv->gt_pm.rc6.enabled = true;
8234 }
8235
8236 static void intel_enable_rps(struct drm_i915_private *dev_priv)
8237 {
8238 struct intel_rps *rps = &dev_priv->gt_pm.rps;
8239
8240 lockdep_assert_held(&dev_priv->pcu_lock);
8241
8242 if (rps->enabled)
8243 return;
8244
8245 if (IS_CHERRYVIEW(dev_priv)) {
8246 cherryview_enable_rps(dev_priv);
8247 } else if (IS_VALLEYVIEW(dev_priv)) {
8248 valleyview_enable_rps(dev_priv);
8249 } else if (INTEL_GEN(dev_priv) >= 9) {
8250 gen9_enable_rps(dev_priv);
8251 } else if (IS_BROADWELL(dev_priv)) {
8252 gen8_enable_rps(dev_priv);
8253 } else if (INTEL_GEN(dev_priv) >= 6) {
8254 gen6_enable_rps(dev_priv);
8255 } else if (IS_IRONLAKE_M(dev_priv)) {
8256 ironlake_enable_drps(dev_priv);
8257 intel_init_emon(dev_priv);
8258 }
8259
8260 WARN_ON(rps->max_freq < rps->min_freq);
8261 WARN_ON(rps->idle_freq > rps->max_freq);
8262
8263 WARN_ON(rps->efficient_freq < rps->min_freq);
8264 WARN_ON(rps->efficient_freq > rps->max_freq);
8265
8266 rps->enabled = true;
8267 }
8268
8269 void intel_enable_gt_powersave(struct drm_i915_private *dev_priv)
8270 {
8271 /* Powersaving is controlled by the host when inside a VM */
8272 if (intel_vgpu_active(dev_priv))
8273 return;
8274
8275 mutex_lock(&dev_priv->pcu_lock);
8276
8277 intel_enable_rc6(dev_priv);
8278 intel_enable_rps(dev_priv);
8279 if (HAS_LLC(dev_priv))
8280 intel_enable_llc_pstate(dev_priv);
8281
8282 mutex_unlock(&dev_priv->pcu_lock);
8283 }
8284
8285 static void __intel_autoenable_gt_powersave(struct work_struct *work)
8286 {
8287 struct drm_i915_private *dev_priv =
8288 container_of(work,
8289 typeof(*dev_priv),
8290 gt_pm.autoenable_work.work);
8291 struct intel_engine_cs *rcs;
8292 struct drm_i915_gem_request *req;
8293
8294 rcs = dev_priv->engine[RCS];
8295 if (rcs->last_retired_context)
8296 goto out;
8297
8298 if (!rcs->init_context)
8299 goto out;
8300
8301 mutex_lock(&dev_priv->drm.struct_mutex);
8302
8303 req = i915_gem_request_alloc(rcs, dev_priv->kernel_context);
8304 if (IS_ERR(req))
8305 goto unlock;
8306
8307 if (!i915_modparams.enable_execlists && i915_switch_context(req) == 0)
8308 rcs->init_context(req);
8309
8310 /* Mark the device busy, calling intel_enable_gt_powersave() */
8311 i915_add_request(req);
8312
8313 unlock:
8314 mutex_unlock(&dev_priv->drm.struct_mutex);
8315 out:
8316 intel_runtime_pm_put(dev_priv);
8317 }
8318
8319 void intel_autoenable_gt_powersave(struct drm_i915_private *dev_priv)
8320 {
8321 if (IS_IRONLAKE_M(dev_priv)) {
8322 ironlake_enable_drps(dev_priv);
8323 intel_init_emon(dev_priv);
8324 } else if (INTEL_INFO(dev_priv)->gen >= 6) {
8325 /*
8326 * PCU communication is slow and this doesn't need to be
8327 * done at any specific time, so do this out of our fast path
8328 * to make resume and init faster.
8329 *
8330 * We depend on the HW RC6 power context save/restore
8331 * mechanism when entering D3 through runtime PM suspend. So
8332 * disable RPM until RPS/RC6 is properly setup. We can only
8333 * get here via the driver load/system resume/runtime resume
8334 * paths, so the _noresume version is enough (and in case of
8335 * runtime resume it's necessary).
8336 */
8337 if (queue_delayed_work(dev_priv->wq,
8338 &dev_priv->gt_pm.autoenable_work,
8339 round_jiffies_up_relative(HZ)))
8340 intel_runtime_pm_get_noresume(dev_priv);
8341 }
8342 }
8343
8344 static void ibx_init_clock_gating(struct drm_i915_private *dev_priv)
8345 {
8346 /*
8347 * On Ibex Peak and Cougar Point, we need to disable clock
8348 * gating for the panel power sequencer or it will fail to
8349 * start up when no ports are active.
8350 */
8351 I915_WRITE(SOUTH_DSPCLK_GATE_D, PCH_DPLSUNIT_CLOCK_GATE_DISABLE);
8352 }
8353
8354 static void g4x_disable_trickle_feed(struct drm_i915_private *dev_priv)
8355 {
8356 enum pipe pipe;
8357
8358 for_each_pipe(dev_priv, pipe) {
8359 I915_WRITE(DSPCNTR(pipe),
8360 I915_READ(DSPCNTR(pipe)) |
8361 DISPPLANE_TRICKLE_FEED_DISABLE);
8362
8363 I915_WRITE(DSPSURF(pipe), I915_READ(DSPSURF(pipe)));
8364 POSTING_READ(DSPSURF(pipe));
8365 }
8366 }
8367
8368 static void ilk_init_clock_gating(struct drm_i915_private *dev_priv)
8369 {
8370 uint32_t dspclk_gate = ILK_VRHUNIT_CLOCK_GATE_DISABLE;
8371
8372 /*
8373 * Required for FBC
8374 * WaFbcDisableDpfcClockGating:ilk
8375 */
8376 dspclk_gate |= ILK_DPFCRUNIT_CLOCK_GATE_DISABLE |
8377 ILK_DPFCUNIT_CLOCK_GATE_DISABLE |
8378 ILK_DPFDUNIT_CLOCK_GATE_ENABLE;
8379
8380 I915_WRITE(PCH_3DCGDIS0,
8381 MARIUNIT_CLOCK_GATE_DISABLE |
8382 SVSMUNIT_CLOCK_GATE_DISABLE);
8383 I915_WRITE(PCH_3DCGDIS1,
8384 VFMUNIT_CLOCK_GATE_DISABLE);
8385
8386 /*
8387 * According to the spec the following bits should be set in
8388 * order to enable memory self-refresh
8389 * The bit 22/21 of 0x42004
8390 * The bit 5 of 0x42020
8391 * The bit 15 of 0x45000
8392 */
8393 I915_WRITE(ILK_DISPLAY_CHICKEN2,
8394 (I915_READ(ILK_DISPLAY_CHICKEN2) |
8395 ILK_DPARB_GATE | ILK_VSDPFD_FULL));
8396 dspclk_gate |= ILK_DPARBUNIT_CLOCK_GATE_ENABLE;
8397 I915_WRITE(DISP_ARB_CTL,
8398 (I915_READ(DISP_ARB_CTL) |
8399 DISP_FBC_WM_DIS));
8400
8401 /*
8402 * Based on the document from hardware guys the following bits
8403 * should be set unconditionally in order to enable FBC.
8404 * The bit 22 of 0x42000
8405 * The bit 22 of 0x42004
8406 * The bit 7,8,9 of 0x42020.
8407 */
8408 if (IS_IRONLAKE_M(dev_priv)) {
8409 /* WaFbcAsynchFlipDisableFbcQueue:ilk */
8410 I915_WRITE(ILK_DISPLAY_CHICKEN1,
8411 I915_READ(ILK_DISPLAY_CHICKEN1) |
8412 ILK_FBCQ_DIS);
8413 I915_WRITE(ILK_DISPLAY_CHICKEN2,
8414 I915_READ(ILK_DISPLAY_CHICKEN2) |
8415 ILK_DPARB_GATE);
8416 }
8417
8418 I915_WRITE(ILK_DSPCLK_GATE_D, dspclk_gate);
8419
8420 I915_WRITE(ILK_DISPLAY_CHICKEN2,
8421 I915_READ(ILK_DISPLAY_CHICKEN2) |
8422 ILK_ELPIN_409_SELECT);
8423 I915_WRITE(_3D_CHICKEN2,
8424 _3D_CHICKEN2_WM_READ_PIPELINED << 16 |
8425 _3D_CHICKEN2_WM_READ_PIPELINED);
8426
8427 /* WaDisableRenderCachePipelinedFlush:ilk */
8428 I915_WRITE(CACHE_MODE_0,
8429 _MASKED_BIT_ENABLE(CM0_PIPELINED_RENDER_FLUSH_DISABLE));
8430
8431 /* WaDisable_RenderCache_OperationalFlush:ilk */
8432 I915_WRITE(CACHE_MODE_0, _MASKED_BIT_DISABLE(RC_OP_FLUSH_ENABLE));
8433
8434 g4x_disable_trickle_feed(dev_priv);
8435
8436 ibx_init_clock_gating(dev_priv);
8437 }
8438
8439 static void cpt_init_clock_gating(struct drm_i915_private *dev_priv)
8440 {
8441 int pipe;
8442 uint32_t val;
8443
8444 /*
8445 * On Ibex Peak and Cougar Point, we need to disable clock
8446 * gating for the panel power sequencer or it will fail to
8447 * start up when no ports are active.
8448 */
8449 I915_WRITE(SOUTH_DSPCLK_GATE_D, PCH_DPLSUNIT_CLOCK_GATE_DISABLE |
8450 PCH_DPLUNIT_CLOCK_GATE_DISABLE |
8451 PCH_CPUNIT_CLOCK_GATE_DISABLE);
8452 I915_WRITE(SOUTH_CHICKEN2, I915_READ(SOUTH_CHICKEN2) |
8453 DPLS_EDP_PPS_FIX_DIS);
8454 /* The below fixes the weird display corruption, a few pixels shifted
8455 * downward, on (only) LVDS of some HP laptops with IVY.
8456 */
8457 for_each_pipe(dev_priv, pipe) {
8458 val = I915_READ(TRANS_CHICKEN2(pipe));
8459 val |= TRANS_CHICKEN2_TIMING_OVERRIDE;
8460 val &= ~TRANS_CHICKEN2_FDI_POLARITY_REVERSED;
8461 if (dev_priv->vbt.fdi_rx_polarity_inverted)
8462 val |= TRANS_CHICKEN2_FDI_POLARITY_REVERSED;
8463 val &= ~TRANS_CHICKEN2_FRAME_START_DELAY_MASK;
8464 val &= ~TRANS_CHICKEN2_DISABLE_DEEP_COLOR_COUNTER;
8465 val &= ~TRANS_CHICKEN2_DISABLE_DEEP_COLOR_MODESWITCH;
8466 I915_WRITE(TRANS_CHICKEN2(pipe), val);
8467 }
8468 /* WADP0ClockGatingDisable */
8469 for_each_pipe(dev_priv, pipe) {
8470 I915_WRITE(TRANS_CHICKEN1(pipe),
8471 TRANS_CHICKEN1_DP0UNIT_GC_DISABLE);
8472 }
8473 }
8474
8475 static void gen6_check_mch_setup(struct drm_i915_private *dev_priv)
8476 {
8477 uint32_t tmp;
8478
8479 tmp = I915_READ(MCH_SSKPD);
8480 if ((tmp & MCH_SSKPD_WM0_MASK) != MCH_SSKPD_WM0_VAL)
8481 DRM_DEBUG_KMS("Wrong MCH_SSKPD value: 0x%08x This can cause underruns.\n",
8482 tmp);
8483 }
8484
8485 static void gen6_init_clock_gating(struct drm_i915_private *dev_priv)
8486 {
8487 uint32_t dspclk_gate = ILK_VRHUNIT_CLOCK_GATE_DISABLE;
8488
8489 I915_WRITE(ILK_DSPCLK_GATE_D, dspclk_gate);
8490
8491 I915_WRITE(ILK_DISPLAY_CHICKEN2,
8492 I915_READ(ILK_DISPLAY_CHICKEN2) |
8493 ILK_ELPIN_409_SELECT);
8494
8495 /* WaDisableHiZPlanesWhenMSAAEnabled:snb */
8496 I915_WRITE(_3D_CHICKEN,
8497 _MASKED_BIT_ENABLE(_3D_CHICKEN_HIZ_PLANE_DISABLE_MSAA_4X_SNB));
8498
8499 /* WaDisable_RenderCache_OperationalFlush:snb */
8500 I915_WRITE(CACHE_MODE_0, _MASKED_BIT_DISABLE(RC_OP_FLUSH_ENABLE));
8501
8502 /*
8503 * BSpec recoomends 8x4 when MSAA is used,
8504 * however in practice 16x4 seems fastest.
8505 *
8506 * Note that PS/WM thread counts depend on the WIZ hashing
8507 * disable bit, which we don't touch here, but it's good
8508 * to keep in mind (see 3DSTATE_PS and 3DSTATE_WM).
8509 */
8510 I915_WRITE(GEN6_GT_MODE,
8511 _MASKED_FIELD(GEN6_WIZ_HASHING_MASK, GEN6_WIZ_HASHING_16x4));
8512
8513 I915_WRITE(CACHE_MODE_0,
8514 _MASKED_BIT_DISABLE(CM0_STC_EVICT_DISABLE_LRA_SNB));
8515
8516 I915_WRITE(GEN6_UCGCTL1,
8517 I915_READ(GEN6_UCGCTL1) |
8518 GEN6_BLBUNIT_CLOCK_GATE_DISABLE |
8519 GEN6_CSUNIT_CLOCK_GATE_DISABLE);
8520
8521 /* According to the BSpec vol1g, bit 12 (RCPBUNIT) clock
8522 * gating disable must be set. Failure to set it results in
8523 * flickering pixels due to Z write ordering failures after
8524 * some amount of runtime in the Mesa "fire" demo, and Unigine
8525 * Sanctuary and Tropics, and apparently anything else with
8526 * alpha test or pixel discard.
8527 *
8528 * According to the spec, bit 11 (RCCUNIT) must also be set,
8529 * but we didn't debug actual testcases to find it out.
8530 *
8531 * WaDisableRCCUnitClockGating:snb
8532 * WaDisableRCPBUnitClockGating:snb
8533 */
8534 I915_WRITE(GEN6_UCGCTL2,
8535 GEN6_RCPBUNIT_CLOCK_GATE_DISABLE |
8536 GEN6_RCCUNIT_CLOCK_GATE_DISABLE);
8537
8538 /* WaStripsFansDisableFastClipPerformanceFix:snb */
8539 I915_WRITE(_3D_CHICKEN3,
8540 _MASKED_BIT_ENABLE(_3D_CHICKEN3_SF_DISABLE_FASTCLIP_CULL));
8541
8542 /*
8543 * Bspec says:
8544 * "This bit must be set if 3DSTATE_CLIP clip mode is set to normal and
8545 * 3DSTATE_SF number of SF output attributes is more than 16."
8546 */
8547 I915_WRITE(_3D_CHICKEN3,
8548 _MASKED_BIT_ENABLE(_3D_CHICKEN3_SF_DISABLE_PIPELINED_ATTR_FETCH));
8549
8550 /*
8551 * According to the spec the following bits should be
8552 * set in order to enable memory self-refresh and fbc:
8553 * The bit21 and bit22 of 0x42000
8554 * The bit21 and bit22 of 0x42004
8555 * The bit5 and bit7 of 0x42020
8556 * The bit14 of 0x70180
8557 * The bit14 of 0x71180
8558 *
8559 * WaFbcAsynchFlipDisableFbcQueue:snb
8560 */
8561 I915_WRITE(ILK_DISPLAY_CHICKEN1,
8562 I915_READ(ILK_DISPLAY_CHICKEN1) |
8563 ILK_FBCQ_DIS | ILK_PABSTRETCH_DIS);
8564 I915_WRITE(ILK_DISPLAY_CHICKEN2,
8565 I915_READ(ILK_DISPLAY_CHICKEN2) |
8566 ILK_DPARB_GATE | ILK_VSDPFD_FULL);
8567 I915_WRITE(ILK_DSPCLK_GATE_D,
8568 I915_READ(ILK_DSPCLK_GATE_D) |
8569 ILK_DPARBUNIT_CLOCK_GATE_ENABLE |
8570 ILK_DPFDUNIT_CLOCK_GATE_ENABLE);
8571
8572 g4x_disable_trickle_feed(dev_priv);
8573
8574 cpt_init_clock_gating(dev_priv);
8575
8576 gen6_check_mch_setup(dev_priv);
8577 }
8578
8579 static void gen7_setup_fixed_func_scheduler(struct drm_i915_private *dev_priv)
8580 {
8581 uint32_t reg = I915_READ(GEN7_FF_THREAD_MODE);
8582
8583 /*
8584 * WaVSThreadDispatchOverride:ivb,vlv
8585 *
8586 * This actually overrides the dispatch
8587 * mode for all thread types.
8588 */
8589 reg &= ~GEN7_FF_SCHED_MASK;
8590 reg |= GEN7_FF_TS_SCHED_HW;
8591 reg |= GEN7_FF_VS_SCHED_HW;
8592 reg |= GEN7_FF_DS_SCHED_HW;
8593
8594 I915_WRITE(GEN7_FF_THREAD_MODE, reg);
8595 }
8596
8597 static void lpt_init_clock_gating(struct drm_i915_private *dev_priv)
8598 {
8599 /*
8600 * TODO: this bit should only be enabled when really needed, then
8601 * disabled when not needed anymore in order to save power.
8602 */
8603 if (HAS_PCH_LPT_LP(dev_priv))
8604 I915_WRITE(SOUTH_DSPCLK_GATE_D,
8605 I915_READ(SOUTH_DSPCLK_GATE_D) |
8606 PCH_LP_PARTITION_LEVEL_DISABLE);
8607
8608 /* WADPOClockGatingDisable:hsw */
8609 I915_WRITE(TRANS_CHICKEN1(PIPE_A),
8610 I915_READ(TRANS_CHICKEN1(PIPE_A)) |
8611 TRANS_CHICKEN1_DP0UNIT_GC_DISABLE);
8612 }
8613
8614 static void lpt_suspend_hw(struct drm_i915_private *dev_priv)
8615 {
8616 if (HAS_PCH_LPT_LP(dev_priv)) {
8617 uint32_t val = I915_READ(SOUTH_DSPCLK_GATE_D);
8618
8619 val &= ~PCH_LP_PARTITION_LEVEL_DISABLE;
8620 I915_WRITE(SOUTH_DSPCLK_GATE_D, val);
8621 }
8622 }
8623
8624 static void gen8_set_l3sqc_credits(struct drm_i915_private *dev_priv,
8625 int general_prio_credits,
8626 int high_prio_credits)
8627 {
8628 u32 misccpctl;
8629 u32 val;
8630
8631 /* WaTempDisableDOPClkGating:bdw */
8632 misccpctl = I915_READ(GEN7_MISCCPCTL);
8633 I915_WRITE(GEN7_MISCCPCTL, misccpctl & ~GEN7_DOP_CLOCK_GATE_ENABLE);
8634
8635 val = I915_READ(GEN8_L3SQCREG1);
8636 val &= ~L3_PRIO_CREDITS_MASK;
8637 val |= L3_GENERAL_PRIO_CREDITS(general_prio_credits);
8638 val |= L3_HIGH_PRIO_CREDITS(high_prio_credits);
8639 I915_WRITE(GEN8_L3SQCREG1, val);
8640
8641 /*
8642 * Wait at least 100 clocks before re-enabling clock gating.
8643 * See the definition of L3SQCREG1 in BSpec.
8644 */
8645 POSTING_READ(GEN8_L3SQCREG1);
8646 udelay(1);
8647 I915_WRITE(GEN7_MISCCPCTL, misccpctl);
8648 }
8649
8650 static void cnp_init_clock_gating(struct drm_i915_private *dev_priv)
8651 {
8652 if (!HAS_PCH_CNP(dev_priv))
8653 return;
8654
8655 /* Wa #1181 */
8656 I915_WRITE(SOUTH_DSPCLK_GATE_D, I915_READ(SOUTH_DSPCLK_GATE_D) |
8657 CNP_PWM_CGE_GATING_DISABLE);
8658 }
8659
8660 static void cnl_init_clock_gating(struct drm_i915_private *dev_priv)
8661 {
8662 cnp_init_clock_gating(dev_priv);
8663
8664 /* This is not an Wa. Enable for better image quality */
8665 I915_WRITE(_3D_CHICKEN3,
8666 _MASKED_BIT_ENABLE(_3D_CHICKEN3_AA_LINE_QUALITY_FIX_ENABLE));
8667
8668 /* WaEnableChickenDCPR:cnl */
8669 I915_WRITE(GEN8_CHICKEN_DCPR_1,
8670 I915_READ(GEN8_CHICKEN_DCPR_1) | MASK_WAKEMEM);
8671
8672 /* WaFbcWakeMemOn:cnl */
8673 I915_WRITE(DISP_ARB_CTL, I915_READ(DISP_ARB_CTL) |
8674 DISP_FBC_MEMORY_WAKE);
8675
8676 /* WaSarbUnitClockGatingDisable:cnl (pre-prod) */
8677 if (IS_CNL_REVID(dev_priv, CNL_REVID_A0, CNL_REVID_B0))
8678 I915_WRITE(SLICE_UNIT_LEVEL_CLKGATE,
8679 I915_READ(SLICE_UNIT_LEVEL_CLKGATE) |
8680 SARBUNIT_CLKGATE_DIS);
8681 }
8682
8683 static void cfl_init_clock_gating(struct drm_i915_private *dev_priv)
8684 {
8685 cnp_init_clock_gating(dev_priv);
8686 gen9_init_clock_gating(dev_priv);
8687
8688 /* WaFbcNukeOnHostModify:cfl */
8689 I915_WRITE(ILK_DPFC_CHICKEN, I915_READ(ILK_DPFC_CHICKEN) |
8690 ILK_DPFC_NUKE_ON_ANY_MODIFICATION);
8691 }
8692
8693 static void kbl_init_clock_gating(struct drm_i915_private *dev_priv)
8694 {
8695 gen9_init_clock_gating(dev_priv);
8696
8697 /* WaDisableSDEUnitClockGating:kbl */
8698 if (IS_KBL_REVID(dev_priv, 0, KBL_REVID_B0))
8699 I915_WRITE(GEN8_UCGCTL6, I915_READ(GEN8_UCGCTL6) |
8700 GEN8_SDEUNIT_CLOCK_GATE_DISABLE);
8701
8702 /* WaDisableGamClockGating:kbl */
8703 if (IS_KBL_REVID(dev_priv, 0, KBL_REVID_B0))
8704 I915_WRITE(GEN6_UCGCTL1, I915_READ(GEN6_UCGCTL1) |
8705 GEN6_GAMUNIT_CLOCK_GATE_DISABLE);
8706
8707 /* WaFbcNukeOnHostModify:kbl */
8708 I915_WRITE(ILK_DPFC_CHICKEN, I915_READ(ILK_DPFC_CHICKEN) |
8709 ILK_DPFC_NUKE_ON_ANY_MODIFICATION);
8710 }
8711
8712 static void skl_init_clock_gating(struct drm_i915_private *dev_priv)
8713 {
8714 gen9_init_clock_gating(dev_priv);
8715
8716 /* WAC6entrylatency:skl */
8717 I915_WRITE(FBC_LLC_READ_CTRL, I915_READ(FBC_LLC_READ_CTRL) |
8718 FBC_LLC_FULLY_OPEN);
8719
8720 /* WaFbcNukeOnHostModify:skl */
8721 I915_WRITE(ILK_DPFC_CHICKEN, I915_READ(ILK_DPFC_CHICKEN) |
8722 ILK_DPFC_NUKE_ON_ANY_MODIFICATION);
8723 }
8724
8725 static void bdw_init_clock_gating(struct drm_i915_private *dev_priv)
8726 {
8727 /* The GTT cache must be disabled if the system is using 2M pages. */
8728 bool can_use_gtt_cache = !HAS_PAGE_SIZES(dev_priv,
8729 I915_GTT_PAGE_SIZE_2M);
8730 enum pipe pipe;
8731
8732 /* WaSwitchSolVfFArbitrationPriority:bdw */
8733 I915_WRITE(GAM_ECOCHK, I915_READ(GAM_ECOCHK) | HSW_ECOCHK_ARB_PRIO_SOL);
8734
8735 /* WaPsrDPAMaskVBlankInSRD:bdw */
8736 I915_WRITE(CHICKEN_PAR1_1,
8737 I915_READ(CHICKEN_PAR1_1) | DPA_MASK_VBLANK_SRD);
8738
8739 /* WaPsrDPRSUnmaskVBlankInSRD:bdw */
8740 for_each_pipe(dev_priv, pipe) {
8741 I915_WRITE(CHICKEN_PIPESL_1(pipe),
8742 I915_READ(CHICKEN_PIPESL_1(pipe)) |
8743 BDW_DPRS_MASK_VBLANK_SRD);
8744 }
8745
8746 /* WaVSRefCountFullforceMissDisable:bdw */
8747 /* WaDSRefCountFullforceMissDisable:bdw */
8748 I915_WRITE(GEN7_FF_THREAD_MODE,
8749 I915_READ(GEN7_FF_THREAD_MODE) &
8750 ~(GEN8_FF_DS_REF_CNT_FFME | GEN7_FF_VS_REF_CNT_FFME));
8751
8752 I915_WRITE(GEN6_RC_SLEEP_PSMI_CONTROL,
8753 _MASKED_BIT_ENABLE(GEN8_RC_SEMA_IDLE_MSG_DISABLE));
8754
8755 /* WaDisableSDEUnitClockGating:bdw */
8756 I915_WRITE(GEN8_UCGCTL6, I915_READ(GEN8_UCGCTL6) |
8757 GEN8_SDEUNIT_CLOCK_GATE_DISABLE);
8758
8759 /* WaProgramL3SqcReg1Default:bdw */
8760 gen8_set_l3sqc_credits(dev_priv, 30, 2);
8761
8762 /* WaGttCachingOffByDefault:bdw */
8763 I915_WRITE(HSW_GTT_CACHE_EN, can_use_gtt_cache ? GTT_CACHE_EN_ALL : 0);
8764
8765 /* WaKVMNotificationOnConfigChange:bdw */
8766 I915_WRITE(CHICKEN_PAR2_1, I915_READ(CHICKEN_PAR2_1)
8767 | KVM_CONFIG_CHANGE_NOTIFICATION_SELECT);
8768
8769 lpt_init_clock_gating(dev_priv);
8770
8771 /* WaDisableDopClockGating:bdw
8772 *
8773 * Also see the CHICKEN2 write in bdw_init_workarounds() to disable DOP
8774 * clock gating.
8775 */
8776 I915_WRITE(GEN6_UCGCTL1,
8777 I915_READ(GEN6_UCGCTL1) | GEN6_EU_TCUNIT_CLOCK_GATE_DISABLE);
8778 }
8779
8780 static void hsw_init_clock_gating(struct drm_i915_private *dev_priv)
8781 {
8782 /* L3 caching of data atomics doesn't work -- disable it. */
8783 I915_WRITE(HSW_SCRATCH1, HSW_SCRATCH1_L3_DATA_ATOMICS_DISABLE);
8784 I915_WRITE(HSW_ROW_CHICKEN3,
8785 _MASKED_BIT_ENABLE(HSW_ROW_CHICKEN3_L3_GLOBAL_ATOMICS_DISABLE));
8786
8787 /* This is required by WaCatErrorRejectionIssue:hsw */
8788 I915_WRITE(GEN7_SQ_CHICKEN_MBCUNIT_CONFIG,
8789 I915_READ(GEN7_SQ_CHICKEN_MBCUNIT_CONFIG) |
8790 GEN7_SQ_CHICKEN_MBCUNIT_SQINTMOB);
8791
8792 /* WaVSRefCountFullforceMissDisable:hsw */
8793 I915_WRITE(GEN7_FF_THREAD_MODE,
8794 I915_READ(GEN7_FF_THREAD_MODE) & ~GEN7_FF_VS_REF_CNT_FFME);
8795
8796 /* WaDisable_RenderCache_OperationalFlush:hsw */
8797 I915_WRITE(CACHE_MODE_0_GEN7, _MASKED_BIT_DISABLE(RC_OP_FLUSH_ENABLE));
8798
8799 /* enable HiZ Raw Stall Optimization */
8800 I915_WRITE(CACHE_MODE_0_GEN7,
8801 _MASKED_BIT_DISABLE(HIZ_RAW_STALL_OPT_DISABLE));
8802
8803 /* WaDisable4x2SubspanOptimization:hsw */
8804 I915_WRITE(CACHE_MODE_1,
8805 _MASKED_BIT_ENABLE(PIXEL_SUBSPAN_COLLECT_OPT_DISABLE));
8806
8807 /*
8808 * BSpec recommends 8x4 when MSAA is used,
8809 * however in practice 16x4 seems fastest.
8810 *
8811 * Note that PS/WM thread counts depend on the WIZ hashing
8812 * disable bit, which we don't touch here, but it's good
8813 * to keep in mind (see 3DSTATE_PS and 3DSTATE_WM).
8814 */
8815 I915_WRITE(GEN7_GT_MODE,
8816 _MASKED_FIELD(GEN6_WIZ_HASHING_MASK, GEN6_WIZ_HASHING_16x4));
8817
8818 /* WaSampleCChickenBitEnable:hsw */
8819 I915_WRITE(HALF_SLICE_CHICKEN3,
8820 _MASKED_BIT_ENABLE(HSW_SAMPLE_C_PERFORMANCE));
8821
8822 /* WaSwitchSolVfFArbitrationPriority:hsw */
8823 I915_WRITE(GAM_ECOCHK, I915_READ(GAM_ECOCHK) | HSW_ECOCHK_ARB_PRIO_SOL);
8824
8825 lpt_init_clock_gating(dev_priv);
8826 }
8827
8828 static void ivb_init_clock_gating(struct drm_i915_private *dev_priv)
8829 {
8830 uint32_t snpcr;
8831
8832 I915_WRITE(ILK_DSPCLK_GATE_D, ILK_VRHUNIT_CLOCK_GATE_DISABLE);
8833
8834 /* WaDisableEarlyCull:ivb */
8835 I915_WRITE(_3D_CHICKEN3,
8836 _MASKED_BIT_ENABLE(_3D_CHICKEN_SF_DISABLE_OBJEND_CULL));
8837
8838 /* WaDisableBackToBackFlipFix:ivb */
8839 I915_WRITE(IVB_CHICKEN3,
8840 CHICKEN3_DGMG_REQ_OUT_FIX_DISABLE |
8841 CHICKEN3_DGMG_DONE_FIX_DISABLE);
8842
8843 /* WaDisablePSDDualDispatchEnable:ivb */
8844 if (IS_IVB_GT1(dev_priv))
8845 I915_WRITE(GEN7_HALF_SLICE_CHICKEN1,
8846 _MASKED_BIT_ENABLE(GEN7_PSD_SINGLE_PORT_DISPATCH_ENABLE));
8847
8848 /* WaDisable_RenderCache_OperationalFlush:ivb */
8849 I915_WRITE(CACHE_MODE_0_GEN7, _MASKED_BIT_DISABLE(RC_OP_FLUSH_ENABLE));
8850
8851 /* Apply the WaDisableRHWOOptimizationForRenderHang:ivb workaround. */
8852 I915_WRITE(GEN7_COMMON_SLICE_CHICKEN1,
8853 GEN7_CSC1_RHWO_OPT_DISABLE_IN_RCC);
8854
8855 /* WaApplyL3ControlAndL3ChickenMode:ivb */
8856 I915_WRITE(GEN7_L3CNTLREG1,
8857 GEN7_WA_FOR_GEN7_L3_CONTROL);
8858 I915_WRITE(GEN7_L3_CHICKEN_MODE_REGISTER,
8859 GEN7_WA_L3_CHICKEN_MODE);
8860 if (IS_IVB_GT1(dev_priv))
8861 I915_WRITE(GEN7_ROW_CHICKEN2,
8862 _MASKED_BIT_ENABLE(DOP_CLOCK_GATING_DISABLE));
8863 else {
8864 /* must write both registers */
8865 I915_WRITE(GEN7_ROW_CHICKEN2,
8866 _MASKED_BIT_ENABLE(DOP_CLOCK_GATING_DISABLE));
8867 I915_WRITE(GEN7_ROW_CHICKEN2_GT2,
8868 _MASKED_BIT_ENABLE(DOP_CLOCK_GATING_DISABLE));
8869 }
8870
8871 /* WaForceL3Serialization:ivb */
8872 I915_WRITE(GEN7_L3SQCREG4, I915_READ(GEN7_L3SQCREG4) &
8873 ~L3SQ_URB_READ_CAM_MATCH_DISABLE);
8874
8875 /*
8876 * According to the spec, bit 13 (RCZUNIT) must be set on IVB.
8877 * This implements the WaDisableRCZUnitClockGating:ivb workaround.
8878 */
8879 I915_WRITE(GEN6_UCGCTL2,
8880 GEN6_RCZUNIT_CLOCK_GATE_DISABLE);
8881
8882 /* This is required by WaCatErrorRejectionIssue:ivb */
8883 I915_WRITE(GEN7_SQ_CHICKEN_MBCUNIT_CONFIG,
8884 I915_READ(GEN7_SQ_CHICKEN_MBCUNIT_CONFIG) |
8885 GEN7_SQ_CHICKEN_MBCUNIT_SQINTMOB);
8886
8887 g4x_disable_trickle_feed(dev_priv);
8888
8889 gen7_setup_fixed_func_scheduler(dev_priv);
8890
8891 if (0) { /* causes HiZ corruption on ivb:gt1 */
8892 /* enable HiZ Raw Stall Optimization */
8893 I915_WRITE(CACHE_MODE_0_GEN7,
8894 _MASKED_BIT_DISABLE(HIZ_RAW_STALL_OPT_DISABLE));
8895 }
8896
8897 /* WaDisable4x2SubspanOptimization:ivb */
8898 I915_WRITE(CACHE_MODE_1,
8899 _MASKED_BIT_ENABLE(PIXEL_SUBSPAN_COLLECT_OPT_DISABLE));
8900
8901 /*
8902 * BSpec recommends 8x4 when MSAA is used,
8903 * however in practice 16x4 seems fastest.
8904 *
8905 * Note that PS/WM thread counts depend on the WIZ hashing
8906 * disable bit, which we don't touch here, but it's good
8907 * to keep in mind (see 3DSTATE_PS and 3DSTATE_WM).
8908 */
8909 I915_WRITE(GEN7_GT_MODE,
8910 _MASKED_FIELD(GEN6_WIZ_HASHING_MASK, GEN6_WIZ_HASHING_16x4));
8911
8912 snpcr = I915_READ(GEN6_MBCUNIT_SNPCR);
8913 snpcr &= ~GEN6_MBC_SNPCR_MASK;
8914 snpcr |= GEN6_MBC_SNPCR_MED;
8915 I915_WRITE(GEN6_MBCUNIT_SNPCR, snpcr);
8916
8917 if (!HAS_PCH_NOP(dev_priv))
8918 cpt_init_clock_gating(dev_priv);
8919
8920 gen6_check_mch_setup(dev_priv);
8921 }
8922
8923 static void vlv_init_clock_gating(struct drm_i915_private *dev_priv)
8924 {
8925 /* WaDisableEarlyCull:vlv */
8926 I915_WRITE(_3D_CHICKEN3,
8927 _MASKED_BIT_ENABLE(_3D_CHICKEN_SF_DISABLE_OBJEND_CULL));
8928
8929 /* WaDisableBackToBackFlipFix:vlv */
8930 I915_WRITE(IVB_CHICKEN3,
8931 CHICKEN3_DGMG_REQ_OUT_FIX_DISABLE |
8932 CHICKEN3_DGMG_DONE_FIX_DISABLE);
8933
8934 /* WaPsdDispatchEnable:vlv */
8935 /* WaDisablePSDDualDispatchEnable:vlv */
8936 I915_WRITE(GEN7_HALF_SLICE_CHICKEN1,
8937 _MASKED_BIT_ENABLE(GEN7_MAX_PS_THREAD_DEP |
8938 GEN7_PSD_SINGLE_PORT_DISPATCH_ENABLE));
8939
8940 /* WaDisable_RenderCache_OperationalFlush:vlv */
8941 I915_WRITE(CACHE_MODE_0_GEN7, _MASKED_BIT_DISABLE(RC_OP_FLUSH_ENABLE));
8942
8943 /* WaForceL3Serialization:vlv */
8944 I915_WRITE(GEN7_L3SQCREG4, I915_READ(GEN7_L3SQCREG4) &
8945 ~L3SQ_URB_READ_CAM_MATCH_DISABLE);
8946
8947 /* WaDisableDopClockGating:vlv */
8948 I915_WRITE(GEN7_ROW_CHICKEN2,
8949 _MASKED_BIT_ENABLE(DOP_CLOCK_GATING_DISABLE));
8950
8951 /* This is required by WaCatErrorRejectionIssue:vlv */
8952 I915_WRITE(GEN7_SQ_CHICKEN_MBCUNIT_CONFIG,
8953 I915_READ(GEN7_SQ_CHICKEN_MBCUNIT_CONFIG) |
8954 GEN7_SQ_CHICKEN_MBCUNIT_SQINTMOB);
8955
8956 gen7_setup_fixed_func_scheduler(dev_priv);
8957
8958 /*
8959 * According to the spec, bit 13 (RCZUNIT) must be set on IVB.
8960 * This implements the WaDisableRCZUnitClockGating:vlv workaround.
8961 */
8962 I915_WRITE(GEN6_UCGCTL2,
8963 GEN6_RCZUNIT_CLOCK_GATE_DISABLE);
8964
8965 /* WaDisableL3Bank2xClockGate:vlv
8966 * Disabling L3 clock gating- MMIO 940c[25] = 1
8967 * Set bit 25, to disable L3_BANK_2x_CLK_GATING */
8968 I915_WRITE(GEN7_UCGCTL4,
8969 I915_READ(GEN7_UCGCTL4) | GEN7_L3BANK2X_CLOCK_GATE_DISABLE);
8970
8971 /*
8972 * BSpec says this must be set, even though
8973 * WaDisable4x2SubspanOptimization isn't listed for VLV.
8974 */
8975 I915_WRITE(CACHE_MODE_1,
8976 _MASKED_BIT_ENABLE(PIXEL_SUBSPAN_COLLECT_OPT_DISABLE));
8977
8978 /*
8979 * BSpec recommends 8x4 when MSAA is used,
8980 * however in practice 16x4 seems fastest.
8981 *
8982 * Note that PS/WM thread counts depend on the WIZ hashing
8983 * disable bit, which we don't touch here, but it's good
8984 * to keep in mind (see 3DSTATE_PS and 3DSTATE_WM).
8985 */
8986 I915_WRITE(GEN7_GT_MODE,
8987 _MASKED_FIELD(GEN6_WIZ_HASHING_MASK, GEN6_WIZ_HASHING_16x4));
8988
8989 /*
8990 * WaIncreaseL3CreditsForVLVB0:vlv
8991 * This is the hardware default actually.
8992 */
8993 I915_WRITE(GEN7_L3SQCREG1, VLV_B0_WA_L3SQCREG1_VALUE);
8994
8995 /*
8996 * WaDisableVLVClockGating_VBIIssue:vlv
8997 * Disable clock gating on th GCFG unit to prevent a delay
8998 * in the reporting of vblank events.
8999 */
9000 I915_WRITE(VLV_GUNIT_CLOCK_GATE, GCFG_DIS);
9001 }
9002
9003 static void chv_init_clock_gating(struct drm_i915_private *dev_priv)
9004 {
9005 /* WaVSRefCountFullforceMissDisable:chv */
9006 /* WaDSRefCountFullforceMissDisable:chv */
9007 I915_WRITE(GEN7_FF_THREAD_MODE,
9008 I915_READ(GEN7_FF_THREAD_MODE) &
9009 ~(GEN8_FF_DS_REF_CNT_FFME | GEN7_FF_VS_REF_CNT_FFME));
9010
9011 /* WaDisableSemaphoreAndSyncFlipWait:chv */
9012 I915_WRITE(GEN6_RC_SLEEP_PSMI_CONTROL,
9013 _MASKED_BIT_ENABLE(GEN8_RC_SEMA_IDLE_MSG_DISABLE));
9014
9015 /* WaDisableCSUnitClockGating:chv */
9016 I915_WRITE(GEN6_UCGCTL1, I915_READ(GEN6_UCGCTL1) |
9017 GEN6_CSUNIT_CLOCK_GATE_DISABLE);
9018
9019 /* WaDisableSDEUnitClockGating:chv */
9020 I915_WRITE(GEN8_UCGCTL6, I915_READ(GEN8_UCGCTL6) |
9021 GEN8_SDEUNIT_CLOCK_GATE_DISABLE);
9022
9023 /*
9024 * WaProgramL3SqcReg1Default:chv
9025 * See gfxspecs/Related Documents/Performance Guide/
9026 * LSQC Setting Recommendations.
9027 */
9028 gen8_set_l3sqc_credits(dev_priv, 38, 2);
9029
9030 /*
9031 * GTT cache may not work with big pages, so if those
9032 * are ever enabled GTT cache may need to be disabled.
9033 */
9034 I915_WRITE(HSW_GTT_CACHE_EN, GTT_CACHE_EN_ALL);
9035 }
9036
9037 static void g4x_init_clock_gating(struct drm_i915_private *dev_priv)
9038 {
9039 uint32_t dspclk_gate;
9040
9041 I915_WRITE(RENCLK_GATE_D1, 0);
9042 I915_WRITE(RENCLK_GATE_D2, VF_UNIT_CLOCK_GATE_DISABLE |
9043 GS_UNIT_CLOCK_GATE_DISABLE |
9044 CL_UNIT_CLOCK_GATE_DISABLE);
9045 I915_WRITE(RAMCLK_GATE_D, 0);
9046 dspclk_gate = VRHUNIT_CLOCK_GATE_DISABLE |
9047 OVRUNIT_CLOCK_GATE_DISABLE |
9048 OVCUNIT_CLOCK_GATE_DISABLE;
9049 if (IS_GM45(dev_priv))
9050 dspclk_gate |= DSSUNIT_CLOCK_GATE_DISABLE;
9051 I915_WRITE(DSPCLK_GATE_D, dspclk_gate);
9052
9053 /* WaDisableRenderCachePipelinedFlush */
9054 I915_WRITE(CACHE_MODE_0,
9055 _MASKED_BIT_ENABLE(CM0_PIPELINED_RENDER_FLUSH_DISABLE));
9056
9057 /* WaDisable_RenderCache_OperationalFlush:g4x */
9058 I915_WRITE(CACHE_MODE_0, _MASKED_BIT_DISABLE(RC_OP_FLUSH_ENABLE));
9059
9060 g4x_disable_trickle_feed(dev_priv);
9061 }
9062
9063 static void i965gm_init_clock_gating(struct drm_i915_private *dev_priv)
9064 {
9065 I915_WRITE(RENCLK_GATE_D1, I965_RCC_CLOCK_GATE_DISABLE);
9066 I915_WRITE(RENCLK_GATE_D2, 0);
9067 I915_WRITE(DSPCLK_GATE_D, 0);
9068 I915_WRITE(RAMCLK_GATE_D, 0);
9069 I915_WRITE16(DEUC, 0);
9070 I915_WRITE(MI_ARB_STATE,
9071 _MASKED_BIT_ENABLE(MI_ARB_DISPLAY_TRICKLE_FEED_DISABLE));
9072
9073 /* WaDisable_RenderCache_OperationalFlush:gen4 */
9074 I915_WRITE(CACHE_MODE_0, _MASKED_BIT_DISABLE(RC_OP_FLUSH_ENABLE));
9075 }
9076
9077 static void i965g_init_clock_gating(struct drm_i915_private *dev_priv)
9078 {
9079 I915_WRITE(RENCLK_GATE_D1, I965_RCZ_CLOCK_GATE_DISABLE |
9080 I965_RCC_CLOCK_GATE_DISABLE |
9081 I965_RCPB_CLOCK_GATE_DISABLE |
9082 I965_ISC_CLOCK_GATE_DISABLE |
9083 I965_FBC_CLOCK_GATE_DISABLE);
9084 I915_WRITE(RENCLK_GATE_D2, 0);
9085 I915_WRITE(MI_ARB_STATE,
9086 _MASKED_BIT_ENABLE(MI_ARB_DISPLAY_TRICKLE_FEED_DISABLE));
9087
9088 /* WaDisable_RenderCache_OperationalFlush:gen4 */
9089 I915_WRITE(CACHE_MODE_0, _MASKED_BIT_DISABLE(RC_OP_FLUSH_ENABLE));
9090 }
9091
9092 static void gen3_init_clock_gating(struct drm_i915_private *dev_priv)
9093 {
9094 u32 dstate = I915_READ(D_STATE);
9095
9096 dstate |= DSTATE_PLL_D3_OFF | DSTATE_GFX_CLOCK_GATING |
9097 DSTATE_DOT_CLOCK_GATING;
9098 I915_WRITE(D_STATE, dstate);
9099
9100 if (IS_PINEVIEW(dev_priv))
9101 I915_WRITE(ECOSKPD, _MASKED_BIT_ENABLE(ECO_GATING_CX_ONLY));
9102
9103 /* IIR "flip pending" means done if this bit is set */
9104 I915_WRITE(ECOSKPD, _MASKED_BIT_DISABLE(ECO_FLIP_DONE));
9105
9106 /* interrupts should cause a wake up from C3 */
9107 I915_WRITE(INSTPM, _MASKED_BIT_ENABLE(INSTPM_AGPBUSY_INT_EN));
9108
9109 /* On GEN3 we really need to make sure the ARB C3 LP bit is set */
9110 I915_WRITE(MI_ARB_STATE, _MASKED_BIT_ENABLE(MI_ARB_C3_LP_WRITE_ENABLE));
9111
9112 I915_WRITE(MI_ARB_STATE,
9113 _MASKED_BIT_ENABLE(MI_ARB_DISPLAY_TRICKLE_FEED_DISABLE));
9114 }
9115
9116 static void i85x_init_clock_gating(struct drm_i915_private *dev_priv)
9117 {
9118 I915_WRITE(RENCLK_GATE_D1, SV_CLOCK_GATE_DISABLE);
9119
9120 /* interrupts should cause a wake up from C3 */
9121 I915_WRITE(MI_STATE, _MASKED_BIT_ENABLE(MI_AGPBUSY_INT_EN) |
9122 _MASKED_BIT_DISABLE(MI_AGPBUSY_830_MODE));
9123
9124 I915_WRITE(MEM_MODE,
9125 _MASKED_BIT_ENABLE(MEM_DISPLAY_TRICKLE_FEED_DISABLE));
9126 }
9127
9128 static void i830_init_clock_gating(struct drm_i915_private *dev_priv)
9129 {
9130 I915_WRITE(MEM_MODE,
9131 _MASKED_BIT_ENABLE(MEM_DISPLAY_A_TRICKLE_FEED_DISABLE) |
9132 _MASKED_BIT_ENABLE(MEM_DISPLAY_B_TRICKLE_FEED_DISABLE));
9133 }
9134
9135 void intel_init_clock_gating(struct drm_i915_private *dev_priv)
9136 {
9137 dev_priv->display.init_clock_gating(dev_priv);
9138 }
9139
9140 void intel_suspend_hw(struct drm_i915_private *dev_priv)
9141 {
9142 if (HAS_PCH_LPT(dev_priv))
9143 lpt_suspend_hw(dev_priv);
9144 }
9145
9146 static void nop_init_clock_gating(struct drm_i915_private *dev_priv)
9147 {
9148 DRM_DEBUG_KMS("No clock gating settings or workarounds applied.\n");
9149 }
9150
9151 /**
9152 * intel_init_clock_gating_hooks - setup the clock gating hooks
9153 * @dev_priv: device private
9154 *
9155 * Setup the hooks that configure which clocks of a given platform can be
9156 * gated and also apply various GT and display specific workarounds for these
9157 * platforms. Note that some GT specific workarounds are applied separately
9158 * when GPU contexts or batchbuffers start their execution.
9159 */
9160 void intel_init_clock_gating_hooks(struct drm_i915_private *dev_priv)
9161 {
9162 if (IS_CANNONLAKE(dev_priv))
9163 dev_priv->display.init_clock_gating = cnl_init_clock_gating;
9164 else if (IS_COFFEELAKE(dev_priv))
9165 dev_priv->display.init_clock_gating = cfl_init_clock_gating;
9166 else if (IS_SKYLAKE(dev_priv))
9167 dev_priv->display.init_clock_gating = skl_init_clock_gating;
9168 else if (IS_KABYLAKE(dev_priv))
9169 dev_priv->display.init_clock_gating = kbl_init_clock_gating;
9170 else if (IS_BROXTON(dev_priv))
9171 dev_priv->display.init_clock_gating = bxt_init_clock_gating;
9172 else if (IS_GEMINILAKE(dev_priv))
9173 dev_priv->display.init_clock_gating = glk_init_clock_gating;
9174 else if (IS_BROADWELL(dev_priv))
9175 dev_priv->display.init_clock_gating = bdw_init_clock_gating;
9176 else if (IS_CHERRYVIEW(dev_priv))
9177 dev_priv->display.init_clock_gating = chv_init_clock_gating;
9178 else if (IS_HASWELL(dev_priv))
9179 dev_priv->display.init_clock_gating = hsw_init_clock_gating;
9180 else if (IS_IVYBRIDGE(dev_priv))
9181 dev_priv->display.init_clock_gating = ivb_init_clock_gating;
9182 else if (IS_VALLEYVIEW(dev_priv))
9183 dev_priv->display.init_clock_gating = vlv_init_clock_gating;
9184 else if (IS_GEN6(dev_priv))
9185 dev_priv->display.init_clock_gating = gen6_init_clock_gating;
9186 else if (IS_GEN5(dev_priv))
9187 dev_priv->display.init_clock_gating = ilk_init_clock_gating;
9188 else if (IS_G4X(dev_priv))
9189 dev_priv->display.init_clock_gating = g4x_init_clock_gating;
9190 else if (IS_I965GM(dev_priv))
9191 dev_priv->display.init_clock_gating = i965gm_init_clock_gating;
9192 else if (IS_I965G(dev_priv))
9193 dev_priv->display.init_clock_gating = i965g_init_clock_gating;
9194 else if (IS_GEN3(dev_priv))
9195 dev_priv->display.init_clock_gating = gen3_init_clock_gating;
9196 else if (IS_I85X(dev_priv) || IS_I865G(dev_priv))
9197 dev_priv->display.init_clock_gating = i85x_init_clock_gating;
9198 else if (IS_GEN2(dev_priv))
9199 dev_priv->display.init_clock_gating = i830_init_clock_gating;
9200 else {
9201 MISSING_CASE(INTEL_DEVID(dev_priv));
9202 dev_priv->display.init_clock_gating = nop_init_clock_gating;
9203 }
9204 }
9205
9206 /* Set up chip specific power management-related functions */
9207 void intel_init_pm(struct drm_i915_private *dev_priv)
9208 {
9209 intel_fbc_init(dev_priv);
9210
9211 /* For cxsr */
9212 if (IS_PINEVIEW(dev_priv))
9213 i915_pineview_get_mem_freq(dev_priv);
9214 else if (IS_GEN5(dev_priv))
9215 i915_ironlake_get_mem_freq(dev_priv);
9216
9217 /* For FIFO watermark updates */
9218 if (INTEL_GEN(dev_priv) >= 9) {
9219 skl_setup_wm_latency(dev_priv);
9220 dev_priv->display.initial_watermarks = skl_initial_wm;
9221 dev_priv->display.atomic_update_watermarks = skl_atomic_update_crtc_wm;
9222 dev_priv->display.compute_global_watermarks = skl_compute_wm;
9223 } else if (HAS_PCH_SPLIT(dev_priv)) {
9224 ilk_setup_wm_latency(dev_priv);
9225
9226 if ((IS_GEN5(dev_priv) && dev_priv->wm.pri_latency[1] &&
9227 dev_priv->wm.spr_latency[1] && dev_priv->wm.cur_latency[1]) ||
9228 (!IS_GEN5(dev_priv) && dev_priv->wm.pri_latency[0] &&
9229 dev_priv->wm.spr_latency[0] && dev_priv->wm.cur_latency[0])) {
9230 dev_priv->display.compute_pipe_wm = ilk_compute_pipe_wm;
9231 dev_priv->display.compute_intermediate_wm =
9232 ilk_compute_intermediate_wm;
9233 dev_priv->display.initial_watermarks =
9234 ilk_initial_watermarks;
9235 dev_priv->display.optimize_watermarks =
9236 ilk_optimize_watermarks;
9237 } else {
9238 DRM_DEBUG_KMS("Failed to read display plane latency. "
9239 "Disable CxSR\n");
9240 }
9241 } else if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) {
9242 vlv_setup_wm_latency(dev_priv);
9243 dev_priv->display.compute_pipe_wm = vlv_compute_pipe_wm;
9244 dev_priv->display.compute_intermediate_wm = vlv_compute_intermediate_wm;
9245 dev_priv->display.initial_watermarks = vlv_initial_watermarks;
9246 dev_priv->display.optimize_watermarks = vlv_optimize_watermarks;
9247 dev_priv->display.atomic_update_watermarks = vlv_atomic_update_fifo;
9248 } else if (IS_G4X(dev_priv)) {
9249 g4x_setup_wm_latency(dev_priv);
9250 dev_priv->display.compute_pipe_wm = g4x_compute_pipe_wm;
9251 dev_priv->display.compute_intermediate_wm = g4x_compute_intermediate_wm;
9252 dev_priv->display.initial_watermarks = g4x_initial_watermarks;
9253 dev_priv->display.optimize_watermarks = g4x_optimize_watermarks;
9254 } else if (IS_PINEVIEW(dev_priv)) {
9255 if (!intel_get_cxsr_latency(IS_PINEVIEW_G(dev_priv),
9256 dev_priv->is_ddr3,
9257 dev_priv->fsb_freq,
9258 dev_priv->mem_freq)) {
9259 DRM_INFO("failed to find known CxSR latency "
9260 "(found ddr%s fsb freq %d, mem freq %d), "
9261 "disabling CxSR\n",
9262 (dev_priv->is_ddr3 == 1) ? "3" : "2",
9263 dev_priv->fsb_freq, dev_priv->mem_freq);
9264 /* Disable CxSR and never update its watermark again */
9265 intel_set_memory_cxsr(dev_priv, false);
9266 dev_priv->display.update_wm = NULL;
9267 } else
9268 dev_priv->display.update_wm = pineview_update_wm;
9269 } else if (IS_GEN4(dev_priv)) {
9270 dev_priv->display.update_wm = i965_update_wm;
9271 } else if (IS_GEN3(dev_priv)) {
9272 dev_priv->display.update_wm = i9xx_update_wm;
9273 dev_priv->display.get_fifo_size = i9xx_get_fifo_size;
9274 } else if (IS_GEN2(dev_priv)) {
9275 if (INTEL_INFO(dev_priv)->num_pipes == 1) {
9276 dev_priv->display.update_wm = i845_update_wm;
9277 dev_priv->display.get_fifo_size = i845_get_fifo_size;
9278 } else {
9279 dev_priv->display.update_wm = i9xx_update_wm;
9280 dev_priv->display.get_fifo_size = i830_get_fifo_size;
9281 }
9282 } else {
9283 DRM_ERROR("unexpected fall-through in intel_init_pm\n");
9284 }
9285 }
9286
9287 static inline int gen6_check_mailbox_status(struct drm_i915_private *dev_priv)
9288 {
9289 uint32_t flags =
9290 I915_READ_FW(GEN6_PCODE_MAILBOX) & GEN6_PCODE_ERROR_MASK;
9291
9292 switch (flags) {
9293 case GEN6_PCODE_SUCCESS:
9294 return 0;
9295 case GEN6_PCODE_UNIMPLEMENTED_CMD:
9296 return -ENODEV;
9297 case GEN6_PCODE_ILLEGAL_CMD:
9298 return -ENXIO;
9299 case GEN6_PCODE_MIN_FREQ_TABLE_GT_RATIO_OUT_OF_RANGE:
9300 case GEN7_PCODE_MIN_FREQ_TABLE_GT_RATIO_OUT_OF_RANGE:
9301 return -EOVERFLOW;
9302 case GEN6_PCODE_TIMEOUT:
9303 return -ETIMEDOUT;
9304 default:
9305 MISSING_CASE(flags);
9306 return 0;
9307 }
9308 }
9309
9310 static inline int gen7_check_mailbox_status(struct drm_i915_private *dev_priv)
9311 {
9312 uint32_t flags =
9313 I915_READ_FW(GEN6_PCODE_MAILBOX) & GEN6_PCODE_ERROR_MASK;
9314
9315 switch (flags) {
9316 case GEN6_PCODE_SUCCESS:
9317 return 0;
9318 case GEN6_PCODE_ILLEGAL_CMD:
9319 return -ENXIO;
9320 case GEN7_PCODE_TIMEOUT:
9321 return -ETIMEDOUT;
9322 case GEN7_PCODE_ILLEGAL_DATA:
9323 return -EINVAL;
9324 case GEN7_PCODE_MIN_FREQ_TABLE_GT_RATIO_OUT_OF_RANGE:
9325 return -EOVERFLOW;
9326 default:
9327 MISSING_CASE(flags);
9328 return 0;
9329 }
9330 }
9331
9332 int sandybridge_pcode_read(struct drm_i915_private *dev_priv, u32 mbox, u32 *val)
9333 {
9334 int status;
9335
9336 WARN_ON(!mutex_is_locked(&dev_priv->pcu_lock));
9337
9338 /* GEN6_PCODE_* are outside of the forcewake domain, we can
9339 * use te fw I915_READ variants to reduce the amount of work
9340 * required when reading/writing.
9341 */
9342
9343 if (I915_READ_FW(GEN6_PCODE_MAILBOX) & GEN6_PCODE_READY) {
9344 DRM_DEBUG_DRIVER("warning: pcode (read from mbox %x) mailbox access failed for %ps\n",
9345 mbox, __builtin_return_address(0));
9346 return -EAGAIN;
9347 }
9348
9349 I915_WRITE_FW(GEN6_PCODE_DATA, *val);
9350 I915_WRITE_FW(GEN6_PCODE_DATA1, 0);
9351 I915_WRITE_FW(GEN6_PCODE_MAILBOX, GEN6_PCODE_READY | mbox);
9352
9353 if (__intel_wait_for_register_fw(dev_priv,
9354 GEN6_PCODE_MAILBOX, GEN6_PCODE_READY, 0,
9355 500, 0, NULL)) {
9356 DRM_ERROR("timeout waiting for pcode read (from mbox %x) to finish for %ps\n",
9357 mbox, __builtin_return_address(0));
9358 return -ETIMEDOUT;
9359 }
9360
9361 *val = I915_READ_FW(GEN6_PCODE_DATA);
9362 I915_WRITE_FW(GEN6_PCODE_DATA, 0);
9363
9364 if (INTEL_GEN(dev_priv) > 6)
9365 status = gen7_check_mailbox_status(dev_priv);
9366 else
9367 status = gen6_check_mailbox_status(dev_priv);
9368
9369 if (status) {
9370 DRM_DEBUG_DRIVER("warning: pcode (read from mbox %x) mailbox access failed for %ps: %d\n",
9371 mbox, __builtin_return_address(0), status);
9372 return status;
9373 }
9374
9375 return 0;
9376 }
9377
9378 int sandybridge_pcode_write_timeout(struct drm_i915_private *dev_priv,
9379 u32 mbox, u32 val, int timeout_us)
9380 {
9381 int status;
9382
9383 WARN_ON(!mutex_is_locked(&dev_priv->pcu_lock));
9384
9385 /* GEN6_PCODE_* are outside of the forcewake domain, we can
9386 * use te fw I915_READ variants to reduce the amount of work
9387 * required when reading/writing.
9388 */
9389
9390 if (I915_READ_FW(GEN6_PCODE_MAILBOX) & GEN6_PCODE_READY) {
9391 DRM_DEBUG_DRIVER("warning: pcode (write of 0x%08x to mbox %x) mailbox access failed for %ps\n",
9392 val, mbox, __builtin_return_address(0));
9393 return -EAGAIN;
9394 }
9395
9396 I915_WRITE_FW(GEN6_PCODE_DATA, val);
9397 I915_WRITE_FW(GEN6_PCODE_DATA1, 0);
9398 I915_WRITE_FW(GEN6_PCODE_MAILBOX, GEN6_PCODE_READY | mbox);
9399
9400 if (__intel_wait_for_register_fw(dev_priv,
9401 GEN6_PCODE_MAILBOX, GEN6_PCODE_READY, 0,
9402 timeout_us, 0, NULL)) {
9403 DRM_ERROR("timeout waiting for pcode write of 0x%08x to mbox %x to finish for %ps\n",
9404 val, mbox, __builtin_return_address(0));
9405 return -ETIMEDOUT;
9406 }
9407
9408 I915_WRITE_FW(GEN6_PCODE_DATA, 0);
9409
9410 if (INTEL_GEN(dev_priv) > 6)
9411 status = gen7_check_mailbox_status(dev_priv);
9412 else
9413 status = gen6_check_mailbox_status(dev_priv);
9414
9415 if (status) {
9416 DRM_DEBUG_DRIVER("warning: pcode (write of 0x%08x to mbox %x) mailbox access failed for %ps: %d\n",
9417 val, mbox, __builtin_return_address(0), status);
9418 return status;
9419 }
9420
9421 return 0;
9422 }
9423
9424 static bool skl_pcode_try_request(struct drm_i915_private *dev_priv, u32 mbox,
9425 u32 request, u32 reply_mask, u32 reply,
9426 u32 *status)
9427 {
9428 u32 val = request;
9429
9430 *status = sandybridge_pcode_read(dev_priv, mbox, &val);
9431
9432 return *status || ((val & reply_mask) == reply);
9433 }
9434
9435 /**
9436 * skl_pcode_request - send PCODE request until acknowledgment
9437 * @dev_priv: device private
9438 * @mbox: PCODE mailbox ID the request is targeted for
9439 * @request: request ID
9440 * @reply_mask: mask used to check for request acknowledgment
9441 * @reply: value used to check for request acknowledgment
9442 * @timeout_base_ms: timeout for polling with preemption enabled
9443 *
9444 * Keep resending the @request to @mbox until PCODE acknowledges it, PCODE
9445 * reports an error or an overall timeout of @timeout_base_ms+50 ms expires.
9446 * The request is acknowledged once the PCODE reply dword equals @reply after
9447 * applying @reply_mask. Polling is first attempted with preemption enabled
9448 * for @timeout_base_ms and if this times out for another 50 ms with
9449 * preemption disabled.
9450 *
9451 * Returns 0 on success, %-ETIMEDOUT in case of a timeout, <0 in case of some
9452 * other error as reported by PCODE.
9453 */
9454 int skl_pcode_request(struct drm_i915_private *dev_priv, u32 mbox, u32 request,
9455 u32 reply_mask, u32 reply, int timeout_base_ms)
9456 {
9457 u32 status;
9458 int ret;
9459
9460 WARN_ON(!mutex_is_locked(&dev_priv->pcu_lock));
9461
9462 #define COND skl_pcode_try_request(dev_priv, mbox, request, reply_mask, reply, \
9463 &status)
9464
9465 /*
9466 * Prime the PCODE by doing a request first. Normally it guarantees
9467 * that a subsequent request, at most @timeout_base_ms later, succeeds.
9468 * _wait_for() doesn't guarantee when its passed condition is evaluated
9469 * first, so send the first request explicitly.
9470 */
9471 if (COND) {
9472 ret = 0;
9473 goto out;
9474 }
9475 ret = _wait_for(COND, timeout_base_ms * 1000, 10);
9476 if (!ret)
9477 goto out;
9478
9479 /*
9480 * The above can time out if the number of requests was low (2 in the
9481 * worst case) _and_ PCODE was busy for some reason even after a
9482 * (queued) request and @timeout_base_ms delay. As a workaround retry
9483 * the poll with preemption disabled to maximize the number of
9484 * requests. Increase the timeout from @timeout_base_ms to 50ms to
9485 * account for interrupts that could reduce the number of these
9486 * requests, and for any quirks of the PCODE firmware that delays
9487 * the request completion.
9488 */
9489 DRM_DEBUG_KMS("PCODE timeout, retrying with preemption disabled\n");
9490 WARN_ON_ONCE(timeout_base_ms > 3);
9491 preempt_disable();
9492 ret = wait_for_atomic(COND, 50);
9493 preempt_enable();
9494
9495 out:
9496 return ret ? ret : status;
9497 #undef COND
9498 }
9499
9500 static int byt_gpu_freq(struct drm_i915_private *dev_priv, int val)
9501 {
9502 struct intel_rps *rps = &dev_priv->gt_pm.rps;
9503
9504 /*
9505 * N = val - 0xb7
9506 * Slow = Fast = GPLL ref * N
9507 */
9508 return DIV_ROUND_CLOSEST(rps->gpll_ref_freq * (val - 0xb7), 1000);
9509 }
9510
9511 static int byt_freq_opcode(struct drm_i915_private *dev_priv, int val)
9512 {
9513 struct intel_rps *rps = &dev_priv->gt_pm.rps;
9514
9515 return DIV_ROUND_CLOSEST(1000 * val, rps->gpll_ref_freq) + 0xb7;
9516 }
9517
9518 static int chv_gpu_freq(struct drm_i915_private *dev_priv, int val)
9519 {
9520 struct intel_rps *rps = &dev_priv->gt_pm.rps;
9521
9522 /*
9523 * N = val / 2
9524 * CU (slow) = CU2x (fast) / 2 = GPLL ref * N / 2
9525 */
9526 return DIV_ROUND_CLOSEST(rps->gpll_ref_freq * val, 2 * 2 * 1000);
9527 }
9528
9529 static int chv_freq_opcode(struct drm_i915_private *dev_priv, int val)
9530 {
9531 struct intel_rps *rps = &dev_priv->gt_pm.rps;
9532
9533 /* CHV needs even values */
9534 return DIV_ROUND_CLOSEST(2 * 1000 * val, rps->gpll_ref_freq) * 2;
9535 }
9536
9537 int intel_gpu_freq(struct drm_i915_private *dev_priv, int val)
9538 {
9539 if (INTEL_GEN(dev_priv) >= 9)
9540 return DIV_ROUND_CLOSEST(val * GT_FREQUENCY_MULTIPLIER,
9541 GEN9_FREQ_SCALER);
9542 else if (IS_CHERRYVIEW(dev_priv))
9543 return chv_gpu_freq(dev_priv, val);
9544 else if (IS_VALLEYVIEW(dev_priv))
9545 return byt_gpu_freq(dev_priv, val);
9546 else
9547 return val * GT_FREQUENCY_MULTIPLIER;
9548 }
9549
9550 int intel_freq_opcode(struct drm_i915_private *dev_priv, int val)
9551 {
9552 if (INTEL_GEN(dev_priv) >= 9)
9553 return DIV_ROUND_CLOSEST(val * GEN9_FREQ_SCALER,
9554 GT_FREQUENCY_MULTIPLIER);
9555 else if (IS_CHERRYVIEW(dev_priv))
9556 return chv_freq_opcode(dev_priv, val);
9557 else if (IS_VALLEYVIEW(dev_priv))
9558 return byt_freq_opcode(dev_priv, val);
9559 else
9560 return DIV_ROUND_CLOSEST(val, GT_FREQUENCY_MULTIPLIER);
9561 }
9562
9563 void intel_pm_setup(struct drm_i915_private *dev_priv)
9564 {
9565 mutex_init(&dev_priv->pcu_lock);
9566
9567 INIT_DELAYED_WORK(&dev_priv->gt_pm.autoenable_work,
9568 __intel_autoenable_gt_powersave);
9569 atomic_set(&dev_priv->gt_pm.rps.num_waiters, 0);
9570
9571 dev_priv->runtime_pm.suspended = false;
9572 atomic_set(&dev_priv->runtime_pm.wakeref_count, 0);
9573 }
9574
9575 static u64 vlv_residency_raw(struct drm_i915_private *dev_priv,
9576 const i915_reg_t reg)
9577 {
9578 u32 lower, upper, tmp;
9579 int loop = 2;
9580
9581 /* The register accessed do not need forcewake. We borrow
9582 * uncore lock to prevent concurrent access to range reg.
9583 */
9584 spin_lock_irq(&dev_priv->uncore.lock);
9585
9586 /* vlv and chv residency counters are 40 bits in width.
9587 * With a control bit, we can choose between upper or lower
9588 * 32bit window into this counter.
9589 *
9590 * Although we always use the counter in high-range mode elsewhere,
9591 * userspace may attempt to read the value before rc6 is initialised,
9592 * before we have set the default VLV_COUNTER_CONTROL value. So always
9593 * set the high bit to be safe.
9594 */
9595 I915_WRITE_FW(VLV_COUNTER_CONTROL,
9596 _MASKED_BIT_ENABLE(VLV_COUNT_RANGE_HIGH));
9597 upper = I915_READ_FW(reg);
9598 do {
9599 tmp = upper;
9600
9601 I915_WRITE_FW(VLV_COUNTER_CONTROL,
9602 _MASKED_BIT_DISABLE(VLV_COUNT_RANGE_HIGH));
9603 lower = I915_READ_FW(reg);
9604
9605 I915_WRITE_FW(VLV_COUNTER_CONTROL,
9606 _MASKED_BIT_ENABLE(VLV_COUNT_RANGE_HIGH));
9607 upper = I915_READ_FW(reg);
9608 } while (upper != tmp && --loop);
9609
9610 /* Everywhere else we always use VLV_COUNTER_CONTROL with the
9611 * VLV_COUNT_RANGE_HIGH bit set - so it is safe to leave it set
9612 * now.
9613 */
9614
9615 spin_unlock_irq(&dev_priv->uncore.lock);
9616
9617 return lower | (u64)upper << 8;
9618 }
9619
9620 u64 intel_rc6_residency_us(struct drm_i915_private *dev_priv,
9621 const i915_reg_t reg)
9622 {
9623 u64 time_hw, units, div;
9624
9625 if (!intel_rc6_enabled())
9626 return 0;
9627
9628 intel_runtime_pm_get(dev_priv);
9629
9630 /* On VLV and CHV, residency time is in CZ units rather than 1.28us */
9631 if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) {
9632 units = 1000;
9633 div = dev_priv->czclk_freq;
9634
9635 time_hw = vlv_residency_raw(dev_priv, reg);
9636 } else if (IS_GEN9_LP(dev_priv)) {
9637 units = 1000;
9638 div = 1200; /* 833.33ns */
9639
9640 time_hw = I915_READ(reg);
9641 } else {
9642 units = 128000; /* 1.28us */
9643 div = 100000;
9644
9645 time_hw = I915_READ(reg);
9646 }
9647
9648 intel_runtime_pm_put(dev_priv);
9649 return DIV_ROUND_UP_ULL(time_hw * units, div);
9650 }